X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bam.h;h=a3305e3cf4b758edaaeaf201ea4fbc87ab8f919b;hb=50be08b2de946accc4c15aab2ec45ac86f30b09f;hp=346c750c8c807c44ea32804eeb1bb93411d0b71a;hpb=4efe4317c377019fbb64bcfae519403a3a9d1f5f;p=samtools.git diff --git a/bam.h b/bam.h index 346c750..a3305e3 100644 --- a/bam.h +++ b/bam.h @@ -40,7 +40,7 @@ @copyright Genome Research Ltd. */ -#define BAM_VERSION "0.1.18 (r982:295)" +#define BAM_VERSION "0.1.18-r580" #include #include @@ -89,7 +89,7 @@ typedef struct { char **target_name; uint32_t *target_len; void *dict, *hash, *rg2lib; - size_t l_text, n_text; + uint32_t l_text, n_text; char *text; } bam_header_t; @@ -150,15 +150,24 @@ typedef struct { /*! @abstract CIGAR: P = padding */ #define BAM_CPAD 6 /*! @abstract CIGAR: equals = match */ -#define BAM_CEQUAL 7 +#define BAM_CEQUAL 7 /*! @abstract CIGAR: X = mismatch */ -#define BAM_CDIFF 8 +#define BAM_CDIFF 8 +#define BAM_CBACK 9 + +#define BAM_CIGAR_STR "MIDNSHP=XB" +#define BAM_CIGAR_TYPE 0x3C1A7 + +#define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK) +#define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT) +#define bam_cigar_opchr(c) (BAM_CIGAR_STR[bam_cigar_op(c)]) +#define bam_cigar_gen(l, o) ((l)<>((o)<<1)&3) // bit 1: consume query; bit 2: consume reference /*! @typedef @abstract Structure for core alignment information. @field tid chromosome ID, defined by bam_header_t @field pos 0-based leftmost coordinate - @field strand strand; 0 for forward and 1 otherwise @field bin bin calculated by bam_reg2bin() @field qual mapping quality @field l_qname length of the query name @@ -183,13 +192,15 @@ typedef struct { @field l_aux length of auxiliary data @field data_len current length of bam1_t::data @field m_data maximum length of bam1_t::data - @field data all variable-length data, concatenated; structure: cigar-qname-seq-qual-aux + @field data all variable-length data, concatenated; structure: qname-cigar-seq-qual-aux @discussion Notes: 1. qname is zero tailing and core.l_qname includes the tailing '\0'. 2. l_qseq is calculated from the total length of an alignment block on reading or from CIGAR. + 3. cigar data is encoded 4 bytes per CIGAR operation. + 4. seq is nybble-encoded according to bam_nt16_table. */ typedef struct { bam1_core_t core; @@ -245,7 +256,10 @@ typedef struct __bam_iter_t *bam_iter_t; @param i The i-th position, 0-based @return 4-bit integer representing the base. */ -#define bam1_seqi(s, i) ((s)[(i)/2] >> 4*(1-(i)%2) & 0xf) +//#define bam1_seqi(s, i) ((s)[(i)/2] >> 4*(1-(i)%2) & 0xf) +#define bam1_seqi(s, i) ((s)[(i)>>1] >> ((~(i)&1)<<2) & 0xf) + +#define bam1_seq_seti(s, i, c) ( (s)[(i)>>1] = ((s)[(i)>>1] & 0xf<<(((i)&1)<<2)) | (c)<<((~(i)&1)<<2) ) /*! @function @abstract Get query sequence and quality @@ -275,6 +289,8 @@ extern int bam_is_be; */ extern int bam_verbose; +extern int bam_no_B; + /*! @abstract Table for converting a nucleotide character to the 4-bit encoding. */ extern unsigned char bam_nt16_table[256]; @@ -420,6 +436,8 @@ extern "C" { */ int bam_read1(bamFile fp, bam1_t *b); + int bam_remove_B(bam1_t *b); + /*! @abstract Write an alignment to BAM. @param fp BAM file handler @@ -755,9 +773,21 @@ static inline int bam_aux_type2size(int x) { if (x == 'C' || x == 'c' || x == 'A') return 1; else if (x == 'S' || x == 's') return 2; - else if (x == 'I' || x == 'i' || x == 'f') return 4; + else if (x == 'I' || x == 'i' || x == 'f' || x == 'F') return 4; else return 0; } +/********************************* + *** Compatibility with htslib *** + *********************************/ + +typedef bam_header_t bam_hdr_t; + +#define bam_get_qname(b) bam1_qname(b) +#define bam_get_cigar(b) bam1_cigar(b) + +#define bam_hdr_read(fp) bam_header_read(fp) +#define bam_hdr_write(fp, h) bam_header_write(fp, h) +#define bam_hdr_destroy(fp) bam_header_destroy(fp) #endif