1 /* This program demonstrates how to generate pileup from multiple BAMs
2 * simutaneously, to achieve random access and to use the BED interface.
3 * To compile this program separately, you may:
5 * gcc -g -O2 -Wall -D_MAIN_BAM2DEPTH bam2depth.c -L. -lbam -lz
13 typedef struct { // auxiliary data structure
14 bamFile fp; // the file handler
15 bam_iter_t iter; // NULL if a region not specified
18 void *bed_read(const char *fn); // read a BED or position list file
19 void bed_destroy(void *_h); // destroy the BED data structure
20 int bed_overlap(const void *_h, const char *chr, int beg, int end); // test if chr:beg-end overlaps
22 // This function reads a BAM alignment from one BAM file.
23 static int read_bam(void *data, bam1_t *b)
25 aux_t *aux = (aux_t*)data; // data in fact is a pointer to an auxiliary structure
26 return aux->iter? bam_iter_read(aux->fp, aux->iter, b) : bam_read1(aux->fp, b);
29 #ifdef _MAIN_BAM2DEPTH
30 int main(int argc, char *argv[])
32 int main_depth(int argc, char *argv[])
35 int i, n, tid, beg, end, pos, *n_plp;
36 const bam_pileup1_t **plp;
37 char *reg = 0; // specified region
38 void *bed = 0; // BED data structure
39 bam_header_t *h = 0; // BAM header of the 1st input
43 // parse the command line
44 while ((n = getopt(argc, argv, "r:b:")) >= 0) {
46 case 'r': reg = strdup(optarg); break; // parsing a region requires a BAM header
47 case 'b': bed = bed_read(optarg); break; // BED or position list file can be parsed now
51 fprintf(stderr, "Usage: bam2depth [-r reg] [-b in.bed] <in1.bam> [...]\n");
55 // initialize the auxiliary data structures
56 n = argc - optind; // the number of BAMs on the command line
57 data = calloc(n, sizeof(void*));
58 beg = 0; end = 1<<30; tid = -1;
59 for (i = 0; i < n; ++i) {
61 data[i] = calloc(1, sizeof(aux_t));
62 data[i]->fp = bam_open(argv[optind+i], "r"); // open BAM
63 htmp = bam_header_read(data[i]->fp); // read the BAM header
65 h = htmp; // keep the header for the 1st BAM
66 if (reg) bam_parse_region(h, reg, &tid, &beg, &end); // also parse the region
67 } else bam_header_destroy(htmp); // if not the 1st BAM, trash the header
68 if (tid >= 0) { // if a region is specified and parsed successfully
69 bam_index_t *idx = bam_index_load(argv[optind+i]); // load the index
70 data[i]->iter = bam_iter_query(idx, tid, beg, end); // set the iterator
71 bam_index_destroy(idx); // the index is not needed any more; phase out of the memory
75 // the core multi-pileup loop
76 mplp = bam_mplp_init(n, read_bam, (void**)data); // initialization
77 n_plp = calloc(n, sizeof(int)); // n_plp[i] is the number of covering reads from the i-th BAM
78 plp = calloc(n, sizeof(void*)); // plp[i] points to the array of covering reads internal in mplp
79 while (bam_mplp_auto(mplp, &tid, &pos, n_plp, plp) > 0) { // come to the next covered position
80 if (pos < beg || pos >= end) continue; // out of range; skip
81 if (bed && bed_overlap(bed, h->target_name[tid], pos, pos + 1) == 0) continue; // not in BED; skip
82 printf("%s\t%d", h->target_name[tid], pos+1);
83 for (i = 0; i < n; ++i) {
85 const bam_pileup1_t *p = plp[i];
86 for (j = 0; j < n_plp[i]; ++j) // this loop counts #reads having deletions or refskip at tid:pos
87 if (p->is_del || p->is_refskip) ++m;
88 printf("\t%d", n_plp[i] - m);
92 free(n_plp); free(plp);
93 bam_mplp_destroy(mplp);
95 bam_header_destroy(h);
96 for (i = 0; i < n; ++i) {
97 bam_close(data[i]->fp);
98 if (data[i]->iter) bam_iter_destroy(data[i]->iter);
101 free(data); free(reg);
102 if (bed) bed_destroy(bed);