X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bam.h;h=eef2ea9853f25966c5a40c45a86272b78a507fb2;hb=8da27db044a4bba830d0340540df501758aa1d03;hp=21a7598d8e9ac81ac994dc13ea6adf4161279513;hpb=bc8511255e4a4ad7eb3233c4e9b65dfbe70a87ed;p=samtools.git diff --git a/bam.h b/bam.h index 21a7598..eef2ea9 100644 --- a/bam.h +++ b/bam.h @@ -1,6 +1,6 @@ /* The MIT License - Copyright (c) 2008 Genome Research Ltd (GRL). + Copyright (c) 2008-2010 Genome Research Ltd (GRL). Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -190,7 +190,7 @@ typedef struct { uint8_t *data; } bam1_t; -typedef struct __bam_iterf_t *bam_iterf_t; +typedef struct __bam_iter_t *bam_iter_t; #define bam1_strand(b) (((b)->core.flag&BAM_FREVERSE) != 0) #define bam1_mstrand(b) (((b)->core.flag&BAM_FMREVERSE) != 0) @@ -230,7 +230,7 @@ typedef struct __bam_iterf_t *bam_iterf_t; @param b pointer to an alignment @return pointer to quality string */ -#define bam1_qual(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + ((b)->core.l_qseq + 1)/2) +#define bam1_qual(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1)) /*! @function @abstract Get a base on read @@ -274,6 +274,10 @@ extern char bam_nt16_nt4_table[]; extern "C" { #endif + /********************* + * Low-level SAM I/O * + *********************/ + /*! @abstract TAM file handler */ typedef struct __tamFile_t *tamFile; @@ -325,6 +329,7 @@ extern "C" { be destroyed in the first place. */ int sam_header_parse(bam_header_t *h); + int32_t bam_get_tid(const bam_header_t *header, const char *seq_name); /*! @abstract Parse @RG lines a update a header struct @@ -338,12 +343,22 @@ extern "C" { #define sam_write1(header, b) bam_view1(header, b) + + /******************************** + * APIs for string dictionaries * + ********************************/ + int bam_strmap_put(void *strmap, const char *rg, const char *lib); const char *bam_strmap_get(const void *strmap, const char *rg); void *bam_strmap_dup(const void*); void *bam_strmap_init(); void bam_strmap_destroy(void *strmap); + + /********************* + * Low-level BAM I/O * + *********************/ + /*! @abstract Initialize a header structure. @return the pointer to the header structure @@ -440,8 +455,28 @@ extern "C" { char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of); + /*! + @abstract Check whether a BAM record is plausibly valid + @param header associated header structure, or NULL if unavailable + @param b alignment to validate + @return 0 if the alignment is invalid; non-zero otherwise + + @discussion Simple consistency check of some of the fields of the + alignment record. If the header is provided, several additional checks + are made. Not all fields are checked, so a non-zero result is not a + guarantee that the record is valid. However it is usually good enough + to detect when bam_seek() has been called with a virtual file offset + that is not the offset of an alignment record. + */ + int bam_validate1(const bam_header_t *header, const bam1_t *b); + const char *bam_get_library(bam_header_t *header, const bam1_t *b); + + /*************** + * pileup APIs * + ***************/ + /*! @typedef @abstract Structure for one alignment covering the pileup position. @field b pointer to the alignment @@ -460,19 +495,31 @@ extern "C" { bam1_t *b; int32_t qpos; int indel, level; - uint32_t is_del:1, is_head:1, is_tail:1; + uint32_t is_del:1, is_head:1, is_tail:1, is_refskip:1, aux:28; } bam_pileup1_t; + typedef int (*bam_plp_auto_f)(void *data, bam1_t *b); + struct __bam_plp_t; typedef struct __bam_plp_t *bam_plp_t; - bam_plp_t bam_plp_init(); + bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data); int bam_plp_push(bam_plp_t iter, const bam1_t *b); - const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_n_plp, int *_tid, int *_pos); + const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp); + const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp); void bam_plp_set_mask(bam_plp_t iter, int mask); + void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt); void bam_plp_reset(bam_plp_t iter); void bam_plp_destroy(bam_plp_t iter); + struct __bam_mplp_t; + typedef struct __bam_mplp_t *bam_mplp_t; + + bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data); + void bam_mplp_destroy(bam_mplp_t iter); + void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt); + int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp); + /*! @typedef @abstract Type of function to be called by bam_plbuf_push(). @param tid chromosome ID as is defined in the header @@ -512,6 +559,11 @@ extern "C" { /*! @abstract bam_plbuf_push() equivalent with level calculated. */ int bam_lplbuf_push(const bam1_t *b, bam_lplbuf_t *buf); + + /********************* + * BAM indexing APIs * + *********************/ + struct __bam_index_t; typedef struct __bam_index_t bam_index_t; @@ -560,9 +612,9 @@ extern "C" { */ int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func); - bam_iterf_t bam_iterf_query(const bam_index_t *idx, int tid, int beg, int end); - int bam_iterf_read(bamFile fp, bam_iterf_t iter, bam1_t *b); - void bam_iterf_destroy(bam_iterf_t iter); + bam_iter_t bam_iter_query(const bam_index_t *idx, int tid, int beg, int end); + int bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b); + void bam_iter_destroy(bam_iter_t iter); /*! @abstract Parse a region in the format: "chr2:100,000-200,000". @@ -576,6 +628,11 @@ extern "C" { */ int bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end); + + /************************** + * APIs for optional tags * + **************************/ + /*! @abstract Retrieve data of a tag @param b pointer to an alignment struct @@ -599,6 +656,11 @@ extern "C" { void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data); uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]); // an alias of bam_aux_get() + + /***************** + * Miscellaneous * + *****************/ + /*! @abstract Calculate the rightmost coordinate of an alignment on the reference genome. @@ -648,8 +710,8 @@ static inline bam1_t *bam_copy1(bam1_t *bdst, const bam1_t *bsrc) { uint8_t *data = bdst->data; int m_data = bdst->m_data; // backup data and m_data - if (m_data < bsrc->m_data) { // double the capacity - m_data = bsrc->m_data; kroundup32(m_data); + if (m_data < bsrc->data_len) { // double the capacity + m_data = bsrc->data_len; kroundup32(m_data); data = (uint8_t*)realloc(data, m_data); } memcpy(data, bsrc->data, bsrc->data_len); // copy var-len data