/* The MIT License
- Copyright (c) 2008 Genome Research Ltd (GRL).
+ Copyright (c) 2008-2010 Genome Research Ltd (GRL).
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
@field n_targets number of reference sequences
@field target_name names of the reference sequences
@field target_len lengths of the referene sequences
+ @field dict header dictionary
@field hash hash table for fast name lookup
@field rg2lib hash table for @RG-ID -> LB lookup
@field l_text length of the plain text in the header
int32_t n_targets;
char **target_name;
uint32_t *target_len;
- void *hash, *rg2lib;
- int l_text;
+ void *dict, *hash, *rg2lib;
+ size_t l_text, n_text;
char *text;
} bam_header_t;
/*! @abstract optical or PCR duplicate */
#define BAM_FDUP 1024
+#define BAM_OFDEC 0
+#define BAM_OFHEX 1
+#define BAM_OFSTR 2
+
/*! @abstract defautl mask for pileup */
#define BAM_DEF_MASK (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP)
uint8_t *data;
} bam1_t;
+typedef struct __bam_iter_t *bam_iter_t;
+
#define bam1_strand(b) (((b)->core.flag&BAM_FREVERSE) != 0)
#define bam1_mstrand(b) (((b)->core.flag&BAM_FMREVERSE) != 0)
@param b pointer to an alignment
@return pointer to quality string
*/
-#define bam1_qual(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + ((b)->core.l_qseq + 1)/2)
+#define bam1_qual(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1))
/*! @function
@abstract Get a base on read
extern "C" {
#endif
+ /*********************
+ * Low-level SAM I/O *
+ *********************/
+
/*! @abstract TAM file handler */
typedef struct __tamFile_t *tamFile;
be destroyed in the first place.
*/
int sam_header_parse(bam_header_t *h);
+ int32_t bam_get_tid(const bam_header_t *header, const char *seq_name);
/*!
@abstract Parse @RG lines a update a header struct
#define sam_write1(header, b) bam_view1(header, b)
+
+ /********************************
+ * APIs for string dictionaries *
+ ********************************/
+
int bam_strmap_put(void *strmap, const char *rg, const char *lib);
const char *bam_strmap_get(const void *strmap, const char *rg);
void *bam_strmap_dup(const void*);
void *bam_strmap_init();
void bam_strmap_destroy(void *strmap);
+
+ /*********************
+ * Low-level BAM I/O *
+ *********************/
+
/*!
@abstract Initialize a header structure.
@return the pointer to the header structure
@abstract Free the memory allocated for an alignment.
@param b pointer to an alignment
*/
-#define bam_destroy1(b) do { \
- free((b)->data); free(b); \
+#define bam_destroy1(b) do { \
+ if (b) { free((b)->data); free(b); } \
} while (0)
/*!
*/
char *bam_format1(const bam_header_t *header, const bam1_t *b);
- char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int is_hex);
+ char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of);
+
+ /*!
+ @abstract Check whether a BAM record is plausibly valid
+ @param header associated header structure, or NULL if unavailable
+ @param b alignment to validate
+ @return 0 if the alignment is invalid; non-zero otherwise
+
+ @discussion Simple consistency check of some of the fields of the
+ alignment record. If the header is provided, several additional checks
+ are made. Not all fields are checked, so a non-zero result is not a
+ guarantee that the record is valid. However it is usually good enough
+ to detect when bam_seek() has been called with a virtual file offset
+ that is not the offset of an alignment record.
+ */
+ int bam_validate1(const bam_header_t *header, const bam1_t *b);
+
+ const char *bam_get_library(bam_header_t *header, const bam1_t *b);
+
+
+ /***************
+ * pileup APIs *
+ ***************/
/*! @typedef
@abstract Structure for one alignment covering the pileup position.
bam1_t *b;
int32_t qpos;
int indel, level;
- uint32_t is_del:1, is_head:1, is_tail:1;
+ uint32_t is_del:1, is_head:1, is_tail:1, is_refskip:1, aux:28;
} bam_pileup1_t;
- struct __bam_plbuf_t;
- /*! @abstract pileup buffer */
- typedef struct __bam_plbuf_t bam_plbuf_t;
+ typedef int (*bam_plp_auto_f)(void *data, bam1_t *b);
- void bam_plbuf_set_mask(bam_plbuf_t *buf, int mask);
+ struct __bam_plp_t;
+ typedef struct __bam_plp_t *bam_plp_t;
+
+ bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data);
+ int bam_plp_push(bam_plp_t iter, const bam1_t *b);
+ const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
+ const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
+ void bam_plp_set_mask(bam_plp_t iter, int mask);
+ void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt);
+ void bam_plp_reset(bam_plp_t iter);
+ void bam_plp_destroy(bam_plp_t iter);
+
+ struct __bam_mplp_t;
+ typedef struct __bam_mplp_t *bam_mplp_t;
+
+ bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data);
+ void bam_mplp_destroy(bam_mplp_t iter);
+ void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt);
+ int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp);
/*! @typedef
@abstract Type of function to be called by bam_plbuf_push().
*/
typedef int (*bam_pileup_f)(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data);
- /*!
- @abstract Reset a pileup buffer for another pileup process
- @param buf the pileup buffer to be reset
- */
- void bam_plbuf_reset(bam_plbuf_t *buf);
+ typedef struct {
+ bam_plp_t iter;
+ bam_pileup_f func;
+ void *data;
+ } bam_plbuf_t;
- /*!
- @abstract Initialize a buffer for pileup.
- @param func fucntion to be called by bam_pileup_core()
- @param data user provided data
- @return pointer to the pileup buffer
- */
+ void bam_plbuf_set_mask(bam_plbuf_t *buf, int mask);
+ void bam_plbuf_reset(bam_plbuf_t *buf);
bam_plbuf_t *bam_plbuf_init(bam_pileup_f func, void *data);
-
- /*!
- @abstract Destroy a pileup buffer.
- @param buf pointer to the pileup buffer
- */
void bam_plbuf_destroy(bam_plbuf_t *buf);
-
- /*!
- @abstract Push an alignment to the pileup buffer.
- @param b alignment to be pushed
- @param buf pileup buffer
- @see bam_plbuf_init()
- @return always 0 currently
-
- @discussion If all the alignments covering a particular site have
- been collected, this function will call the user defined function
- as is provided to bam_plbuf_init(). The coordinate of the site and
- all the alignments will be transferred to the user defined
- function as function parameters.
-
- When all the alignments are pushed to the buffer, this function
- needs to be called with b equal to NULL. This will flush the
- buffer. A pileup buffer can only be reused when bam_plbuf_reset()
- is called.
- */
int bam_plbuf_push(const bam1_t *b, bam_plbuf_t *buf);
int bam_pileup_file(bamFile fp, int mask, bam_pileup_f func, void *func_data);
/*! @abstract bam_plbuf_push() equivalent with level calculated. */
int bam_lplbuf_push(const bam1_t *b, bam_lplbuf_t *buf);
+
+ /*********************
+ * BAM indexing APIs *
+ *********************/
+
struct __bam_index_t;
typedef struct __bam_index_t bam_index_t;
*/
int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func);
+ bam_iter_t bam_iter_query(const bam_index_t *idx, int tid, int beg, int end);
+ int bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b);
+ void bam_iter_destroy(bam_iter_t iter);
+
/*!
@abstract Parse a region in the format: "chr2:100,000-200,000".
@discussion bam_header_t::hash will be initialized if empty.
*/
int bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end);
+
+ /**************************
+ * APIs for optional tags *
+ **************************/
+
/*!
@abstract Retrieve data of a tag
@param b pointer to an alignment struct
void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data);
uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]); // an alias of bam_aux_get()
+
+ /*****************
+ * Miscellaneous *
+ *****************/
+
/*!
@abstract Calculate the rightmost coordinate of an alignment on the
reference genome.
*/
int32_t bam_cigar2qlen(const bam1_core_t *c, const uint32_t *cigar);
- typedef struct {
- int32_t qbeg, qend;
- int32_t tbeg, tend;
- int32_t cbeg, cend;
- } bam_segreg_t;
-
- int bam_segreg(int32_t pos, const bam1_core_t *c, const uint32_t *cigar, bam_segreg_t *reg);
-
#ifdef __cplusplus
}
#endif
{
uint8_t *data = bdst->data;
int m_data = bdst->m_data; // backup data and m_data
- if (m_data < bsrc->m_data) { // double the capacity
- m_data = bsrc->m_data; kroundup32(m_data);
+ if (m_data < bsrc->data_len) { // double the capacity
+ m_data = bsrc->data_len; kroundup32(m_data);
data = (uint8_t*)realloc(data, m_data);
}
memcpy(data, bsrc->data, bsrc->data_len); // copy var-len data