From 1cd91e74134d244084eb6c6b23b093c14c778c89 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Fri, 1 Apr 2011 03:17:03 +0000 Subject: [PATCH] Added an example of using mpileup --- Makefile | 2 +- bam2depth.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++++ bamtk.c | 3 ++ 3 files changed, 108 insertions(+), 1 deletion(-) create mode 100644 bam2depth.c diff --git a/Makefile b/Makefile index 2cfa085..4fc03d5 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ LOBJS= bgzf.o kstring.o bam_aux.o bam.o bam_import.o sam.o bam_index.o \ AOBJS= bam_tview.o bam_maqcns.o bam_plcmd.o sam_view.o \ bam_rmdup.o bam_rmdupse.o bam_mate.o bam_stat.o bam_color.o \ bamtk.o kaln.o bam2bcf.o bam2bcf_indel.o errmod.o sample.o \ - cut_target.o phase.o + cut_target.o phase.o bam2depth.o PROG= samtools INCLUDES= -I. SUBDIRS= . bcftools misc diff --git a/bam2depth.c b/bam2depth.c new file mode 100644 index 0000000..e1d1907 --- /dev/null +++ b/bam2depth.c @@ -0,0 +1,104 @@ +/* This program demonstrates how to generate pileup from multiple BAMs + * simutaneously, to achieve random access and to use the BED interface. + * To compile this program separately, you may: + * + * gcc -g -O2 -Wall -D_MAIN_BAM2BED bam2depth.c -L. -lbam -lz + */ +#include +#include +#include +#include +#include "bam.h" + +typedef struct { // auxiliary data structure + bamFile fp; // the file handler + bam_iter_t iter; // NULL if a region not specified +} aux_t; + +void *bed_read(const char *fn); // read a BED or position list file +void bed_destroy(void *_h); // destroy the BED data structure +int bed_overlap(const void *_h, const char *chr, int beg, int end); // test if chr:beg-end overlaps + +// This function reads a BAM alignment from one BAM file. +static int read_bam(void *data, bam1_t *b) +{ + aux_t *aux = (aux_t*)data; // data in fact is a pointer to an auxiliary structure + return aux->iter? bam_iter_read(aux->fp, aux->iter, b) : bam_read1(aux->fp, b); +} + +#ifdef _MAIN_BAM2BED +int main(int argc, char *argv[]) +#else +int main_depth(int argc, char *argv[]) +#endif +{ + int i, n, tid, beg, end, pos, *n_plp; + const bam_pileup1_t **plp; + char *reg = 0; // specified region + void *bed = 0; // BED data structure + bam_header_t *h = 0; // BAM header of the 1st input + aux_t **data; + bam_mplp_t mplp; + + // parse the command line + while ((n = getopt(argc, argv, "r:b:")) >= 0) { + switch (n) { + case 'r': reg = strdup(optarg); break; // parsing a region requires a BAM header + case 'b': bed = bed_read(optarg); break; // BED or position list file can be parsed now + } + } + if (optind == argc) { + fprintf(stderr, "Usage: bam2depth [-r reg] [-b in.bed] [...]\n"); + return 1; + } + + // initialize the auxiliary data structures + n = argc - optind; // the number of BAMs on the command line + data = calloc(n, sizeof(void*)); + beg = 0; end = 1<<30; tid = -1; + for (i = 0; i < n; ++i) { + bam_header_t *htmp; + data[i] = calloc(1, sizeof(aux_t)); + data[i]->fp = bam_open(argv[optind+i], "r"); // open BAM + htmp = bam_header_read(data[i]->fp); // read the BAM header + if (i == 0) { + h = htmp; // keep the header for the 1st BAM + if (reg) bam_parse_region(h, reg, &tid, &beg, &end); // also parse the region + } else bam_header_destroy(htmp); // if not the 1st BAM, trash the header + if (tid >= 0) { // if a region is specified and parsed successfully + bam_index_t *idx = bam_index_load(argv[optind+i]); // load the index + data[i]->iter = bam_iter_query(idx, tid, beg, end); // set the iterator + bam_index_destroy(idx); // the index is not needed any more; phase out of the memory + } + } + + // the core multi-pileup loop + mplp = bam_mplp_init(n, read_bam, (void**)data); // initialization + n_plp = calloc(n, sizeof(int)); // n_plp[i] is the number of covering reads from the i-th BAM + plp = calloc(n, sizeof(void*)); // plp[i] points to the array of covering reads internal in mplp + while (bam_mplp_auto(mplp, &tid, &pos, n_plp, plp) > 0) { // come to the next covered position + if (pos < beg || pos >= end) continue; // out of range; skip + if (bed && bed_overlap(bed, h->target_name[tid], pos, pos + 1) == 0) continue; // not in BED; skip + printf("%s\t%d", h->target_name[tid], pos+1); + for (i = 0; i < n; ++i) { + int j, m = 0; + const bam_pileup1_t *p = plp[i]; + for (j = 0; j < n_plp[i]; ++j) // this loop counts #reads having deletions of refskip at tid:pos + if (p->is_del || p->is_refskip) ++m; + printf("\t%d", n_plp[i] - m); + } + putchar('\n'); + } + free(n_plp); free(plp); + bam_mplp_destroy(mplp); + + bam_header_destroy(h); + for (i = 0; i < n; ++i) { + bam_close(data[i]->fp); + if (data[i]->iter) bam_iter_destroy(data[i]->iter); + free(data[i]); + } + free(data); free(reg); + if (bed) bed_destroy(bed); + return 0; +} diff --git a/bamtk.c b/bamtk.c index 5309d5e..5886570 100644 --- a/bamtk.c +++ b/bamtk.c @@ -26,6 +26,7 @@ int main_reheader(int argc, char *argv[]); int main_cut_target(int argc, char *argv[]); int main_phase(int argc, char *argv[]); int main_cat(int argc, char *argv[]); +int main_depth(int argc, char *argv[]); int faidx_main(int argc, char *argv[]); int glf3_view_main(int argc, char *argv[]); @@ -80,6 +81,7 @@ static int usage() fprintf(stderr, " sort sort alignment file\n"); fprintf(stderr, " pileup generate pileup output\n"); fprintf(stderr, " mpileup multi-way pileup\n"); + fprintf(stderr, " depth compute the depth\n"); fprintf(stderr, " faidx index/extract FASTA\n"); #if _CURSES_LIB != 0 fprintf(stderr, " tview text alignment viewer\n"); @@ -136,6 +138,7 @@ int main(int argc, char *argv[]) else if (strcmp(argv[1], "cat") == 0) return main_cat(argc-1, argv+1); else if (strcmp(argv[1], "targetcut") == 0) return main_cut_target(argc-1, argv+1); else if (strcmp(argv[1], "phase") == 0) return main_phase(argc-1, argv+1); + else if (strcmp(argv[1], "depth") == 0) return main_depth(argc-1, argv+1); #if _CURSES_LIB != 0 else if (strcmp(argv[1], "tview") == 0) return bam_tview_main(argc-1, argv+1); #endif -- 2.39.2