X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bam.h;h=376e3246318f84cb28166f3052ad0b0aa96d375a;hb=48a039bc3586634215c5aa6a3bb689adb1038af8;hp=fb71a49b46dafea0512a99e157735104cd3832a3;hpb=a958954399757774ee26bfcf0b5b95e9ec9b62f4;p=samtools.git diff --git a/bam.h b/bam.h index fb71a49..376e324 100644 --- a/bam.h +++ b/bam.h @@ -1,6 +1,6 @@ /* The MIT License - Copyright (c) 2008 Genome Research Ltd (GRL). + Copyright (c) 2008-2010 Genome Research Ltd (GRL). Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the @@ -33,13 +33,15 @@ BAM library provides I/O and various operations on manipulating files in the BAM (Binary Alignment/Mapping) or SAM (Sequence Alignment/Map) - format. It now supports importing from or exporting to TAM, sorting, + format. It now supports importing from or exporting to SAM, sorting, merging, generating pileup, and quickly retrieval of reads overlapped with a specified region. @copyright Genome Research Ltd. */ +#define BAM_VERSION "0.1.18 (r982:295)" + #include #include #include @@ -132,20 +134,32 @@ typedef struct { /* CIGAR operations. */ -/*! @abstract CIGAR: match */ +/*! @abstract CIGAR: M = match or mismatch*/ #define BAM_CMATCH 0 -/*! @abstract CIGAR: insertion to the reference */ +/*! @abstract CIGAR: I = insertion to the reference */ #define BAM_CINS 1 -/*! @abstract CIGAR: deletion from the reference */ +/*! @abstract CIGAR: D = deletion from the reference */ #define BAM_CDEL 2 -/*! @abstract CIGAR: skip on the reference (e.g. spliced alignment) */ +/*! @abstract CIGAR: N = skip on the reference (e.g. spliced alignment) */ #define BAM_CREF_SKIP 3 -/*! @abstract CIGAR: clip on the read with clipped sequence present in qseq */ +/*! @abstract CIGAR: S = clip on the read with clipped sequence + present in qseq */ #define BAM_CSOFT_CLIP 4 -/*! @abstract CIGAR: clip on the read with clipped sequence trimmed off */ +/*! @abstract CIGAR: H = clip on the read with clipped sequence trimmed off */ #define BAM_CHARD_CLIP 5 -/*! @abstract CIGAR: padding */ +/*! @abstract CIGAR: P = padding */ #define BAM_CPAD 6 +/*! @abstract CIGAR: equals = match */ +#define BAM_CEQUAL 7 +/*! @abstract CIGAR: X = mismatch */ +#define BAM_CDIFF 8 + +#define BAM_CIGAR_STR "MIDNSHP=X" + +#define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK) +#define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT) +#define bam_cigar_opchr(c) (BAM_CIGAR_STR[bam_cigar_op(c)]) +#define bam_cigar_gen(o, l) ((o)<data + (b)->core.n_cigar*4 + (b)->core.l_qname + ((b)->core.l_qseq + 1)/2) +#define bam1_qual(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1)) /*! @function @abstract Get a base on read @@ -262,6 +276,12 @@ typedef struct __bam_iter_t *bam_iter_t; */ extern int bam_is_be; +/*! + @abstract Verbose level between 0 and 3; 0 is supposed to disable all + debugging information, though this may not have been implemented. + */ +extern int bam_verbose; + /*! @abstract Table for converting a nucleotide character to the 4-bit encoding. */ extern unsigned char bam_nt16_table[256]; @@ -329,6 +349,7 @@ extern "C" { be destroyed in the first place. */ int sam_header_parse(bam_header_t *h); + int32_t bam_get_tid(const bam_header_t *header, const char *seq_name); /*! @abstract Parse @RG lines a update a header struct @@ -454,6 +475,21 @@ extern "C" { char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of); + /*! + @abstract Check whether a BAM record is plausibly valid + @param header associated header structure, or NULL if unavailable + @param b alignment to validate + @return 0 if the alignment is invalid; non-zero otherwise + + @discussion Simple consistency check of some of the fields of the + alignment record. If the header is provided, several additional checks + are made. Not all fields are checked, so a non-zero result is not a + guarantee that the record is valid. However it is usually good enough + to detect when bam_seek() has been called with a virtual file offset + that is not the offset of an alignment record. + */ + int bam_validate1(const bam_header_t *header, const bam1_t *b); + const char *bam_get_library(bam_header_t *header, const bam1_t *b); @@ -479,7 +515,7 @@ extern "C" { bam1_t *b; int32_t qpos; int indel, level; - uint32_t is_del:1, is_head:1, is_tail:1; + uint32_t is_del:1, is_head:1, is_tail:1, is_refskip:1, aux:28; } bam_pileup1_t; typedef int (*bam_plp_auto_f)(void *data, bam1_t *b); @@ -492,6 +528,7 @@ extern "C" { const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp); const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp); void bam_plp_set_mask(bam_plp_t iter, int mask); + void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt); void bam_plp_reset(bam_plp_t iter); void bam_plp_destroy(bam_plp_t iter); @@ -500,6 +537,7 @@ extern "C" { bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data); void bam_mplp_destroy(bam_mplp_t iter); + void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt); int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp); /*! @typedef @@ -692,8 +730,8 @@ static inline bam1_t *bam_copy1(bam1_t *bdst, const bam1_t *bsrc) { uint8_t *data = bdst->data; int m_data = bdst->m_data; // backup data and m_data - if (m_data < bsrc->m_data) { // double the capacity - m_data = bsrc->m_data; kroundup32(m_data); + if (m_data < bsrc->data_len) { // double the capacity + m_data = bsrc->data_len; kroundup32(m_data); data = (uint8_t*)realloc(data, m_data); } memcpy(data, bsrc->data, bsrc->data_len); // copy var-len data @@ -720,4 +758,13 @@ static inline bam1_t *bam_dup1(const bam1_t *src) return b; } +static inline int bam_aux_type2size(int x) +{ + if (x == 'C' || x == 'c' || x == 'A') return 1; + else if (x == 'S' || x == 's') return 2; + else if (x == 'I' || x == 'i' || x == 'f') return 4; + else return 0; +} + + #endif