From 1a23a35867ee992dcffcf3d17df2b7d41732f33b Mon Sep 17 00:00:00 2001 From: Heng Li Date: Sat, 13 Jun 2009 12:00:08 +0000 Subject: [PATCH] * samtools-0.1.4-10 (r341) * only include key APIs in libbam.a * move color-specific routines to bam_color.c * update documentations * remove the support of -q in pileup --- Makefile | 21 ++++---- bam.h | 153 +++++++++++++++++++++++++++------------------------- bam_aux.c | 104 ----------------------------------- bam_color.c | 127 +++++++++++++++++++++++++++++++++++++++++++ bam_plcmd.c | 8 ++- bam_sort.c | 24 +++++++++ bam_tview.c | 4 ++ bamtk.c | 2 +- sam.c | 5 +- sam.h | 67 ++++++++++++++++++++++- 10 files changed, 317 insertions(+), 198 deletions(-) create mode 100644 bam_color.c diff --git a/Makefile b/Makefile index 724552f..9bf6d80 100644 --- a/Makefile +++ b/Makefile @@ -2,12 +2,13 @@ CC= gcc CXX= g++ CFLAGS= -g -Wall -O2 #-m64 #-arch ppc CXXFLAGS= $(CFLAGS) -DFLAGS= -D_IOLIB=2 -D_FILE_OFFSET_BITS=64 #-D_NO_CURSES -OBJS= bam.o bam_import.o bam_pileup.o bam_lpileup.o bam_sort.o bam_index.o \ - razf.o bgzf.o faidx.o bam_tview.o bam_maqcns.o bam_aux.o bam_plcmd.o \ - bam_mate.o bam_rmdup.o glf.o bam_stat.o kstring.o bam_md.o sam.o sam_view.o \ - bam_rmdupse.o -PROG= bgzip samtools +DFLAGS= -D_FILE_OFFSET_BITS=64 #-D_NO_CURSES +LOBJS= bgzf.o kstring.o bam_aux.o bam.o bam_import.o sam.o bam_index.o \ + bam_pileup.o bam_lpileup.o bam_md.o glf.o razf.o faidx.o +AOBJS= bam_sort.o bam_tview.o bam_maqcns.o bam_plcmd.o sam_view.o \ + bam_rmdup.o bam_rmdupse.o bam_mate.o bam_stat.o bam_color.o \ + bamtk.o +PROG= samtools bgzip INCLUDES= SUBDIRS= . misc @@ -30,12 +31,12 @@ all:$(PROG) lib:libbam.a -libbam.a:$(OBJS) - $(AR) -cru $@ $(OBJS) +libbam.a:$(LOBJS) + $(AR) -cru $@ $(LOBJS) ### For the curses library: comment out `-lcurses' if you do not have curses installed -samtools:lib bamtk.o - $(CC) $(CFLAGS) -o $@ bamtk.o -lm -L. -lbam -lcurses -lz +samtools:lib $(AOBJS) + $(CC) $(CFLAGS) -o $@ $(AOBJS) -lm -lcurses -lz -L. -lbam razip:razip.o razf.o $(CC) $(CFLAGS) -o $@ razf.o razip.o -lz diff --git a/bam.h b/bam.h index c635e72..6316c9b 100644 --- a/bam.h +++ b/bam.h @@ -32,7 +32,7 @@ @header BAM library provides I/O and various operations on manipulating files - in the BAM (Binary Alignment/Mapping) or TAM (Text Alignment/Mapping) + in the BAM (Binary Alignment/Mapping) or SAM (Sequence Alignment/Map) format. It now supports importing from or exporting to TAM, sorting, merging, generating pileup, and quickly retrieval of reads overlapped with a specified region. @@ -46,6 +46,8 @@ #include #include +#define _IOLIB 2 + #if _IOLIB == 1 && !defined(_NO_RAZF) #define BAM_TRUE_OFFSET #include "razf.h" @@ -90,6 +92,7 @@ typedef RAZF *bamFile; @field target_name names of the reference sequences @field target_len lengths of the referene sequences @field hash hash table for fast name lookup + @field rg2lib hash table for @RG-ID -> LB lookup @field l_text length of the plain text in the header @field text plain text @@ -113,14 +116,22 @@ typedef struct { #define BAM_FUNMAP 4 /*! @abstract the mate is unmapped */ #define BAM_FMUNMAP 8 +/*! @abstract the read is mapped to the reverse strand */ #define BAM_FREVERSE 16 +/*! @abstract the mate is mapped to the reverse strand */ #define BAM_FMREVERSE 32 +/*! @abstract this is read1 */ #define BAM_FREAD1 64 +/*! @abstract this is read2 */ #define BAM_FREAD2 128 +/*! @abstract not primary alignment */ #define BAM_FSECONDARY 256 +/*! @abstract QC failure */ #define BAM_FQCFAIL 512 +/*! @abstract optical or PCR duplicate */ #define BAM_FDUP 1024 +/*! @abstract defautl mask for pileup */ #define BAM_DEF_MASK (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP) #define BAM_CORE_SIZE sizeof(bam1_core_t) @@ -247,12 +258,6 @@ typedef struct { */ #define bam1_aux(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (b)->core.l_qseq + ((b)->core.l_qseq + 1)/2) -typedef struct { - int32_t qbeg, qend; - int32_t tbeg, tend; - int32_t cbeg, cend; -} bam_segreg_t; - #ifndef kroundup32 /*! @function @abstract Round an integer to the next closest power-2 integer. @@ -284,21 +289,21 @@ extern "C" { typedef struct __tamFile_t *tamFile; /*! - @abstract Open a TAM file, either uncompressed or compressed by gzip/zlib. - @param fn TAM file name - @return TAM file handler + @abstract Open a SAM file for reading, either uncompressed or compressed by gzip/zlib. + @param fn SAM file name + @return SAM file handler */ tamFile sam_open(const char *fn); /*! - @abstract Close a TAM file handler - @param fp TAM file handler + @abstract Close a SAM file handler + @param fp SAM file handler */ void sam_close(tamFile fp); /*! - @abstract Read one alignment from a TAM file handler - @param fp TAM file handler + @abstract Read one alignment from a SAM file handler + @param fp SAM file handler @param header header information (ordered names of chromosomes) @param b read alignment; all members in b will be updated @return 0 if successful; otherwise negative @@ -315,8 +320,31 @@ extern "C" { */ bam_header_t *sam_header_read2(const char *fn_list); + /*! + @abstract Read header from a SAM file (if present) + @param fp SAM file handler + @return pointer to header struct; 0 if no @SQ lines available + */ bam_header_t *sam_header_read(tamFile fp); + + /*! + @abstract Parse @SQ lines a update a header struct + @param h pointer to the header struct to be updated + @return number of target sequences + + @discussion bam_header_t::{n_targets,target_len,target_name} will + be destroyed in the first place. + */ int sam_header_parse(bam_header_t *h); + + /*! + @abstract Parse @RG lines a update a header struct + @param h pointer to the header struct to be updated + @return number of @RG lines + + @discussion bam_header_t::rg2lib will be destroyed in the first + place. + */ int sam_header_parse_rg(bam_header_t *h); #define sam_write1(header, b) bam_view1(header, b) @@ -421,34 +449,6 @@ extern "C" { */ char *bam_format1(const bam_header_t *header, const bam1_t *b); - /*! - @abstract Merge multiple sorted BAM. - @param is_by_qname whether to sort by query name - @param out output BAM file name - @param n number of files to be merged - @param fn names of files to be merged - - @discussion Padding information may NOT correctly maintained. This - function is NOT thread safe. - */ - void bam_merge_core(int is_by_qname, const char *out, int n, char * const *fn); - - /*! - @abstract Sort an unsorted BAM file based on the chromosome order - and the leftmost position of an alignment - - @param is_by_qname whether to sort by query name - @param fn name of the file to be sorted - @param prefix prefix of the output and the temporary files; upon - sucessess, prefix.bam will be written. - @param max_mem approxiate maximum memory (very inaccurate) - - @discussion It may create multiple temporary subalignment files - and then merge them by calling bam_merge_core(). This function is - NOT thread safe. - */ - void bam_sort_core(int is_by_qname, const char *fn, const char *prefix, size_t max_mem); - /*! @typedef @abstract Structure for one alignment covering the pileup position. @field b pointer to the alignment @@ -487,6 +487,10 @@ extern "C" { */ typedef int (*bam_pileup_f)(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data); + /*! + @abstract Reset a pileup buffer for another pileup process + @param buf the pileup buffer to be reset + */ void bam_plbuf_reset(bam_plbuf_t *buf); /*! @@ -512,13 +516,14 @@ extern "C" { @discussion If all the alignments covering a particular site have been collected, this function will call the user defined function - as is provided to bam_plbuf_init(). The coordinate of the site the + as is provided to bam_plbuf_init(). The coordinate of the site and all the alignments will be transferred to the user defined function as function parameters. When all the alignments are pushed to the buffer, this function needs to be called with b equal to NULL. This will flush the - buffer. A pileup buffer cannot be reused. + buffer. A pileup buffer can only be reused when bam_plbuf_reset() + is called. */ int bam_plbuf_push(const bam1_t *b, bam_plbuf_t *buf); @@ -598,43 +603,28 @@ extern "C" { */ void bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end); - void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data); - // uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]); // an alias of bam_aux_get() + /*! + @abstract Retrieve data of a tag + @param b pointer to an alignment struct + @param tag two-character tag to be retrieved + + @return pointer to the type and data. The first character is the + type that can be 'iIsScCdfAZH'. + + @discussion Use bam_aux2?() series to convert the returned data to + the corresponding type. + */ uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]); + int32_t bam_aux2i(const uint8_t *s); float bam_aux2f(const uint8_t *s); double bam_aux2d(const uint8_t *s); char bam_aux2A(const uint8_t *s); char *bam_aux2Z(const uint8_t *s); - /*! - @abstract Get the color encoding the previous and current base - @param b pointer to an alignment - @param i The i-th position, 0-based - @return color - - @discussion Returns 0 no color information is found. - */ - char bam_aux_getCSi(bam1_t *b, int i); - - /*! - @abstract Get the color quality of the color encoding the previous and current base - @param b pointer to an alignment - @param i The i-th position, 0-based - @return color quality - - @discussion Returns 0 no color information is found. - */ - char bam_aux_getCQi(bam1_t *b, int i); - - /*! - @abstract Get the color error profile at the give position - @param b pointer to an alignment - @return the original color if the color was an error, '-' (dash) otherwise + void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data); - @discussion Returns 0 no color information is found. - */ - char bam_aux_getCEi(bam1_t *b, int i); + // uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]); // an alias of bam_aux_get() /*! @abstract Calculate the rightmost coordinate of an alignment on the @@ -654,6 +644,12 @@ extern "C" { */ int32_t bam_cigar2qlen(const bam1_core_t *c, const uint32_t *cigar); + typedef struct { + int32_t qbeg, qend; + int32_t tbeg, tend; + int32_t cbeg, cend; + } bam_segreg_t; + int bam_segreg(int32_t pos, const bam1_core_t *c, const uint32_t *cigar, bam_segreg_t *reg); #ifdef __cplusplus @@ -677,6 +673,12 @@ static inline int bam_reg2bin(uint32_t beg, uint32_t end) return 0; } +/*! + @abstract Copy an alignment + @param bdst destination alignment struct + @param bsrc source alignment struct + @return pointer to the destination alignment struct + */ static inline bam1_t *bam_copy1(bam1_t *bdst, const bam1_t *bsrc) { uint8_t *data = bdst->data; @@ -693,6 +695,11 @@ static inline bam1_t *bam_copy1(bam1_t *bdst, const bam1_t *bsrc) return bdst; } +/*! + @abstract Duplicate an alignment + @param src source alignment struct + @return pointer to the destination alignment struct + */ static inline bam1_t *bam_dup1(const bam1_t *src) { bam1_t *b; diff --git a/bam_aux.c b/bam_aux.c index f36bc23..a63e2ae 100644 --- a/bam_aux.c +++ b/bam_aux.c @@ -226,107 +226,3 @@ void bam_strmap_destroy(void *rg2lib) } kh_destroy(r2l, h); } - -/*** The following routines were implemented by Nils Homer for color-space support in tview ***/ - -char bam_aux_getCSi(bam1_t *b, int i) -{ - uint8_t *c = bam_aux_get(b, "CS"); - char *cs = NULL; - - // return the base if the tag was not found - if(0 == c) return 0; - - cs = bam_aux2Z(c); - // adjust for strandedness and leading adaptor - if(bam1_strand(b)) i = strlen(cs) - 1 - i; - else i++; - return cs[i]; -} - -char bam_aux_getCQi(bam1_t *b, int i) -{ - uint8_t *c = bam_aux_get(b, "CQ"); - char *cq = NULL; - - // return the base if the tag was not found - if(0 == c) return 0; - - cq = bam_aux2Z(c); - // adjust for strandedness - if(bam1_strand(b)) i = strlen(cq) - 1 - i; - return cq[i]; -} - -char bam_aux_nt2int(char a) -{ - switch(toupper(a)) { - case 'A': - return 0; - break; - case 'C': - return 1; - break; - case 'G': - return 2; - break; - case 'T': - return 3; - break; - default: - return 4; - break; - } -} - -char bam_aux_ntnt2cs(char a, char b) -{ - a = bam_aux_nt2int(a); - b = bam_aux_nt2int(b); - if(4 == a || 4 == b) return '4'; - return "0123"[(int)(a ^ b)]; -} - -char bam_aux_getCEi(bam1_t *b, int i) -{ - int cs_i; - uint8_t *c = bam_aux_get(b, "CS"); - char *cs = NULL; - char prev_b, cur_b; - char cur_color, cor_color; - - // return the base if the tag was not found - if(0 == c) return 0; - - cs = bam_aux2Z(c); - - // adjust for strandedness and leading adaptor - if(bam1_strand(b)) { //reverse strand - cs_i = strlen(cs) - 1 - i; - // get current color - cur_color = cs[cs_i]; - // get previous base - prev_b = (0 == cs_i) ? cs[0] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i+1)]; - // get current base - cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)]; - } - else { - cs_i=i+1; - // get current color - cur_color = cs[cs_i]; - // get previous base - prev_b = (0 == i) ? cs[0] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i-1)]; - // get current base - cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)]; - } - - // corrected color - cor_color = bam_aux_ntnt2cs(prev_b, cur_b); - - if(cur_color == cor_color) { - return '-'; - } - else { - return cur_color; - } -} diff --git a/bam_color.c b/bam_color.c new file mode 100644 index 0000000..75aedd6 --- /dev/null +++ b/bam_color.c @@ -0,0 +1,127 @@ +#include +#include "bam.h" + +/*! + @abstract Get the color encoding the previous and current base + @param b pointer to an alignment + @param i The i-th position, 0-based + @return color + + @discussion Returns 0 no color information is found. + */ +char bam_aux_getCSi(bam1_t *b, int i) +{ + uint8_t *c = bam_aux_get(b, "CS"); + char *cs = NULL; + + // return the base if the tag was not found + if(0 == c) return 0; + + cs = bam_aux2Z(c); + // adjust for strandedness and leading adaptor + if(bam1_strand(b)) i = strlen(cs) - 1 - i; + else i++; + return cs[i]; +} + +/*! + @abstract Get the color quality of the color encoding the previous and current base + @param b pointer to an alignment + @param i The i-th position, 0-based + @return color quality + + @discussion Returns 0 no color information is found. + */ +char bam_aux_getCQi(bam1_t *b, int i) +{ + uint8_t *c = bam_aux_get(b, "CQ"); + char *cq = NULL; + + // return the base if the tag was not found + if(0 == c) return 0; + + cq = bam_aux2Z(c); + // adjust for strandedness + if(bam1_strand(b)) i = strlen(cq) - 1 - i; + return cq[i]; +} + +char bam_aux_nt2int(char a) +{ + switch(toupper(a)) { + case 'A': + return 0; + break; + case 'C': + return 1; + break; + case 'G': + return 2; + break; + case 'T': + return 3; + break; + default: + return 4; + break; + } +} + +char bam_aux_ntnt2cs(char a, char b) +{ + a = bam_aux_nt2int(a); + b = bam_aux_nt2int(b); + if(4 == a || 4 == b) return '4'; + return "0123"[(int)(a ^ b)]; +} + +/*! + @abstract Get the color error profile at the give position + @param b pointer to an alignment + @return the original color if the color was an error, '-' (dash) otherwise + + @discussion Returns 0 no color information is found. + */ +char bam_aux_getCEi(bam1_t *b, int i) +{ + int cs_i; + uint8_t *c = bam_aux_get(b, "CS"); + char *cs = NULL; + char prev_b, cur_b; + char cur_color, cor_color; + + // return the base if the tag was not found + if(0 == c) return 0; + + cs = bam_aux2Z(c); + + // adjust for strandedness and leading adaptor + if(bam1_strand(b)) { //reverse strand + cs_i = strlen(cs) - 1 - i; + // get current color + cur_color = cs[cs_i]; + // get previous base + prev_b = (0 == cs_i) ? cs[0] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i+1)]; + // get current base + cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)]; + } + else { + cs_i=i+1; + // get current color + cur_color = cs[cs_i]; + // get previous base + prev_b = (0 == i) ? cs[0] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i-1)]; + // get current base + cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)]; + } + + // corrected color + cor_color = bam_aux_ntnt2cs(prev_b, cur_b); + + if(cur_color == cor_color) { + return '-'; + } + else { + return cur_color; + } +} diff --git a/bam_plcmd.c b/bam_plcmd.c index 96f5c45..46f078a 100644 --- a/bam_plcmd.c +++ b/bam_plcmd.c @@ -26,7 +26,7 @@ typedef struct { khash_t(64) *hash; uint32_t format; int tid, len, last_pos; - int mask, min_mapQ; + int mask; char *ref; glfFile fp_glf; // for glf output only } pu_data_t; @@ -280,7 +280,7 @@ int bam_pileup(int argc, char *argv[]) d->tid = -1; d->mask = BAM_DEF_MASK; d->c = bam_maqcns_init(); d->ido = bam_maqindel_opt_init(); - while ((c = getopt(argc, argv, "st:f:cT:N:r:l:im:gI:G:vM:q:")) >= 0) { + while ((c = getopt(argc, argv, "st:f:cT:N:r:l:im:gI:G:vM:")) >= 0) { switch (c) { case 's': d->format |= BAM_PLF_SIMPLE; break; case 't': fn_list = strdup(optarg); break; @@ -297,7 +297,6 @@ int bam_pileup(int argc, char *argv[]) case 'g': d->format |= BAM_PLF_GLF; break; case 'I': d->ido->q_indel = atoi(optarg); break; case 'G': d->ido->r_indel = atof(optarg); break; - case 'q': d->min_mapQ = atoi(optarg); break; default: fprintf(stderr, "Unrecognizd option '-%c'.\n", c); return 1; } } @@ -307,7 +306,6 @@ int bam_pileup(int argc, char *argv[]) fprintf(stderr, "Option: -s simple (yet incomplete) pileup format\n"); fprintf(stderr, " -i only show lines/consensus with indels\n"); fprintf(stderr, " -m INT filtering reads with bits in INT [%d]\n", d->mask); - fprintf(stderr, " -q INT filtering reads with mapping quality smaller than INT [%d]\n", d->min_mapQ); fprintf(stderr, " -M INT cap mapping quality at INT [%d]\n", d->c->cap_mapQ); fprintf(stderr, " -t FILE list of reference sequences (assume the input is in SAM)\n"); fprintf(stderr, " -l FILE list of sites at which pileup is output\n"); @@ -344,7 +342,7 @@ int bam_pileup(int argc, char *argv[]) } d->h = fp->header; if (fn_pos) d->hash = load_pos(fn_pos, d->h); - sampileup(fp, d->mask, d->min_mapQ, pileup_func, d); + sampileup(fp, d->mask, pileup_func, d); samclose(fp); // d->h will be destroyed here } diff --git a/bam_sort.c b/bam_sort.c index 7c5fbb2..402792a 100644 --- a/bam_sort.c +++ b/bam_sort.c @@ -47,6 +47,16 @@ static inline int heap_lt(const heap1_t a, const heap1_t b) KSORT_INIT(heap, heap1_t, heap_lt) +/*! + @abstract Merge multiple sorted BAM. + @param is_by_qname whether to sort by query name + @param out output BAM file name + @param n number of files to be merged + @param fn names of files to be merged + + @discussion Padding information may NOT correctly maintained. This + function is NOT thread safe. + */ void bam_merge_core(int by_qname, const char *out, int n, char * const *fn) { bamFile fpout, *fp; @@ -155,6 +165,20 @@ static void sort_blocks(int n, int k, bam1_p *buf, const char *prefix, const bam bam_close(fp); } +/*! + @abstract Sort an unsorted BAM file based on the chromosome order + and the leftmost position of an alignment + + @param is_by_qname whether to sort by query name + @param fn name of the file to be sorted + @param prefix prefix of the output and the temporary files; upon + sucessess, prefix.bam will be written. + @param max_mem approxiate maximum memory (very inaccurate) + + @discussion It may create multiple temporary subalignment files + and then merge them by calling bam_merge_core(). This function is + NOT thread safe. + */ void bam_sort_core(int is_by_qname, const char *fn, const char *prefix, size_t max_mem) { int n, ret, k, i; diff --git a/bam_tview.c b/bam_tview.c index c26273c..60b7350 100644 --- a/bam_tview.c +++ b/bam_tview.c @@ -8,6 +8,10 @@ #include "faidx.h" #include "bam_maqcns.h" +char bam_aux_getCEi(bam1_t *b, int i); +char bam_aux_getCSi(bam1_t *b, int i); +char bam_aux_getCQi(bam1_t *b, int i); + #define TV_MIN_ALNROW 2 #define TV_MAX_GOTO 40 #define TV_LOW_MAPQ 10 diff --git a/bamtk.c b/bamtk.c index 54b19f3..ff5f642 100644 --- a/bamtk.c +++ b/bamtk.c @@ -3,7 +3,7 @@ #include "bam.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.4-9 (r340)" +#define PACKAGE_VERSION "0.1.4-10 (r341)" #endif int bam_taf2baf(int argc, char *argv[]); diff --git a/sam.c b/sam.c index 4c16b02..5b02abb 100644 --- a/sam.c +++ b/sam.c @@ -120,7 +120,7 @@ int samwrite(samfile_t *fp, const bam1_t *b) } } -int sampileup(samfile_t *fp, int mask, int min_mapQ, bam_pileup_f func, void *func_data) +int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *func_data) { bam_plbuf_t *buf; int ret; @@ -129,8 +129,7 @@ int sampileup(samfile_t *fp, int mask, int min_mapQ, bam_pileup_f func, void *fu buf = bam_plbuf_init(func, func_data); bam_plbuf_set_mask(buf, mask); while ((ret = samread(fp, b)) >= 0) - if (b->core.qual >= min_mapQ) - bam_plbuf_push(b, buf); + bam_plbuf_push(b, buf); bam_plbuf_push(0, buf); bam_plbuf_destroy(buf); bam_destroy1(b); diff --git a/sam.h b/sam.h index 082fe96..970cf2d 100644 --- a/sam.h +++ b/sam.h @@ -3,6 +3,24 @@ #include "bam.h" +/*! + @header + + This file provides higher level of I/O routines and unifies the APIs + for SAM and BAM formats. These APIs are more convenient and + recommended. + + @copyright Genome Research Ltd. + */ + +/*! @typedef + @abstract SAM/BAM file handler + @field type type of the handler; bit 1 for BAM and bit 2 for reading + @field bam BAM file handler; valid if (type&1) == 1 + @field tamr SAM file handler for reading; valid if type == 2 + @field tamw SAM file handler for writing; valid if type == 0 + @field header header struct + */ typedef struct { int type; union { @@ -17,12 +35,57 @@ typedef struct { extern "C" { #endif - // mode can be: r/w/rb/wb. On writing, aux points to bam_header_t; on reading, aux points to the name of fn_list for SAM + /*! + @abstract Open a SAM/BAM file + + @param fn SAM/BAM file name; "-" is recognized as stdin (for + reading) or stdout (for writing). + + @param mode open mode /[rw](b?)(u?)(h?)/: 'r' for reading, 'w' for + writing, 'b' for BAM I/O, 'u' for uncompressed BAM output and 'h' + for outputing header in SAM. If 'b' present, it must immediately + follow 'r' or 'w'. Valid modes are "r", "w", "wh", "rb", "wb" and + "wbu" exclusively. + + @param aux auxiliary data; if mode[0]=='w', aux points to + bam_header_t; if strcmp(mode, "rb")==0 and @SQ header lines in SAM + are absent, aux points the file name of the list of the reference; + aux is not used otherwise. + + @return SAM/BAM file handler + */ samfile_t *samopen(const char *fn, const char *mode, const void *aux); + + /*! + @abstract Close a SAM/BAM handler + @param fp file handler to be closed + */ void samclose(samfile_t *fp); + + /*! + @abstract Read one alignment + @param fp file handler + @param b alignment + @return bytes read + */ int samread(samfile_t *fp, bam1_t *b); + + /*! + @abstract Write one alignment + @param fp file handler + @param b alignment + @return bytes written + */ int samwrite(samfile_t *fp, const bam1_t *b); - int sampileup(samfile_t *fp, int mask, int min_mapQ, bam_pileup_f func, void *func_data); + + /*! + @abstract Get the pileup for a whole alignment file + @param fp file handler + @param mask mask transferred to bam_plbuf_set_mask() + @param func user defined function called in the pileup process + #param data user provided data for func() + */ + int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *data); #ifdef __cplusplus } -- 2.39.2