X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bam_plcmd.c;h=84838dd98e1d56c7270ae78225a8023e9c96ca57;hb=efa4a1055d7691e3cd1368dcafa210c2691a5fd2;hp=92023b87f92e4c293c7bb4409cac63373668c017;hpb=aee93c83c1c7ff30c2c959a14877cf8a4e2d8f92;p=samtools.git diff --git a/bam_plcmd.c b/bam_plcmd.c index 92023b8..84838dd 100644 --- a/bam_plcmd.c +++ b/bam_plcmd.c @@ -163,18 +163,18 @@ static void pileup_seq(const bam_pileup1_t *p, int pos, int ref_len, const char if (p->is_head) printf("^%c", p->b->core.qual > 93? 126 : p->b->core.qual + 33); if (!p->is_del) { int j, rb, c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)]; - rb = (ref && (int)pos < ref_len)? ref[pos] : 'N'; + rb = (ref && pos < ref_len)? ref[pos] : 'N'; if (c == '=' || toupper(c) == toupper(rb)) c = bam1_strand(p->b)? ',' : '.'; else c = bam1_strand(p->b)? tolower(c) : toupper(c); putchar(c); if (p->indel > 0) { - putchar('+'); putw(p->indel, stdout); + printf("+%d", p->indel); for (j = 1; j <= p->indel; ++j) { c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + j)]; putchar(bam1_strand(p->b)? tolower(c) : toupper(c)); } } else if (p->indel < 0) { - putw(p->indel, stdout); + printf("%d", p->indel); for (j = 1; j <= -p->indel; ++j) { c = (ref && (int)pos+j < ref_len)? ref[pos+j] : 'N'; putchar(bam1_strand(p->b)? tolower(c) : toupper(c)); @@ -341,10 +341,11 @@ int bam_pileup(int argc, char *argv[]) d->max_depth = 0; d->tid = -1; d->mask = BAM_DEF_MASK; d->c = bam_maqcns_init(); + d->c->is_soap = 1; // change the default model d->ido = bam_maqindel_opt_init(); while ((c = getopt(argc, argv, "st:f:cT:N:r:l:d:im:gI:G:vM:S2aR:P")) >= 0) { switch (c) { - case 'a': d->c->is_soap = 1; break; + case 'a': d->c->is_soap = 0; break; case 's': d->format |= BAM_PLF_SIMPLE; break; case 't': fn_list = strdup(optarg); break; case 'l': fn_pos = strdup(optarg); break; @@ -379,7 +380,7 @@ int bam_pileup(int argc, char *argv[]) fprintf(stderr, "Usage: samtools pileup [options] |\n\n"); fprintf(stderr, "Option: -s simple (yet incomplete) pileup format\n"); fprintf(stderr, " -S the input is in SAM\n"); - fprintf(stderr, " -a use the SOAPsnp model for SNP calling\n"); + fprintf(stderr, " -a use the MAQ model for SNP calling\n"); fprintf(stderr, " -2 output the 2nd best call and quality\n"); fprintf(stderr, " -i only show lines/consensus with indels\n"); fprintf(stderr, " -m INT filtering reads with bits in INT [%d]\n", d->mask); @@ -446,36 +447,81 @@ int bam_pileup(int argc, char *argv[]) * mpileup * ***********/ -static int mpileup(int n, char **fn) +typedef struct { + char *reg; + faidx_t *fai; +} mplp_conf_t; + +typedef struct { + bamFile fp; + bam_iter_t iter; +} mplp_aux_t; + +static int mplp_func(void *data, bam1_t *b) { - bamFile *fp; - int i, tid, pos, *n_plp; + mplp_aux_t *ma = (mplp_aux_t*)data; + if (ma->iter) return bam_iter_read(ma->fp, ma->iter, b); + return bam_read1(ma->fp, b); +} + +static int mpileup(mplp_conf_t *conf, int n, char **fn) +{ + mplp_aux_t **data; + int i, tid, pos, *n_plp, beg0 = 0, end0 = 1u<<29, ref_len, ref_tid; const bam_pileup1_t **plp; bam_mplp_t iter; bam_header_t *h = 0; - fp = calloc(n, sizeof(void*)); + char *ref; + // allocate + data = calloc(n, sizeof(void*)); plp = calloc(n, sizeof(void*)); n_plp = calloc(n, sizeof(int*)); + // read the header and initialize data for (i = 0; i < n; ++i) { bam_header_t *h_tmp; - fp[i] = bam_open(fn[i], "r"); - h_tmp = bam_header_read(fp[i]); + data[i] = calloc(1, sizeof(mplp_aux_t)); + data[i]->fp = bam_open(fn[i], "r"); + h_tmp = bam_header_read(data[i]->fp); + if (conf->reg) { + int beg, end; + bam_index_t *idx; + idx = bam_index_load(fn[i]); + if (idx == 0) { + fprintf(stderr, "[%s] fail to load index for %d-th input.\n", __func__, i+1); + exit(1); + } + if (bam_parse_region(h_tmp, conf->reg, &tid, &beg, &end) < 0) { + fprintf(stderr, "[%s] malformatted region or wrong seqname for %d-th input.\n", __func__, i+1); + exit(1); + } + if (i == 0) beg0 = beg, end0 = end; + data[i]->iter = bam_iter_query(idx, tid, beg, end); + bam_index_destroy(idx); + } if (i == 0) h = h_tmp; else { // FIXME: to check consistency bam_header_destroy(h_tmp); } } - iter = bam_mplp_init(n, bam_read1, fp); + // mpileup + ref_tid = -1; ref = 0; + iter = bam_mplp_init(n, mplp_func, (void**)data); while (bam_mplp_auto(iter, &tid, &pos, n_plp, plp) > 0) { - printf("%s\t%d\tN", h->target_name[tid], pos + 1); + if (conf->reg && (pos < beg0 || pos >= end0)) continue; // out of the region requested + if (tid != ref_tid) { + free(ref); + if (conf->fai) ref = fai_fetch(conf->fai, h->target_name[tid], &ref_len); + ref_tid = tid; + } + printf("%s\t%d\t%c", h->target_name[tid], pos + 1, (ref && pos < ref_len)? ref[pos] : 'N'); for (i = 0; i < n; ++i) { int j; printf("\t%d\t", n_plp[i]); if (n_plp[i] == 0) printf("*\t*"); else { for (j = 0; j < n_plp[i]; ++j) - pileup_seq(plp[i] + j, pos, 0, 0); + pileup_seq(plp[i] + j, pos, ref_len, ref); putchar('\t'); for (j = 0; j < n_plp[i]; ++j) { const bam_pileup1_t *p = plp[i] + j; @@ -489,17 +535,35 @@ static int mpileup(int n, char **fn) } bam_mplp_destroy(iter); bam_header_destroy(h); - for (i = 0; i < n; ++i) bam_close(fp[i]); - free(fp); free(plp); + for (i = 0; i < n; ++i) { + bam_close(data[i]->fp); + if (data[i]->iter) bam_iter_destroy(data[i]->iter); + free(data[i]); + } + free(data); free(plp); free(ref); free(n_plp); return 0; } int bam_mpileup(int argc, char *argv[]) { + int c; + mplp_conf_t mplp; + memset(&mplp, 0, sizeof(mplp_conf_t)); + while ((c = getopt(argc, argv, "f:r:")) >= 0) { + switch (c) { + case 'f': + mplp.fai = fai_load(optarg); + if (mplp.fai == 0) return 1; + break; + case 'r': mplp.reg = strdup(optarg); + } + } if (argc == 1) { - fprintf(stderr, "Usage: samtools mpileup [ [...]]\n"); + fprintf(stderr, "Usage: samtools mpileup [-r reg] [-f in.fa] in1.bam [in2.bam [...]]\n"); return 1; } - mpileup(argc - 1, argv + 1); + mpileup(&mplp, argc - optind, argv + optind); + free(mplp.reg); + if (mplp.fai) fai_destroy(mplp.fai); return 0; }