X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bam_plcmd.c;h=7f13eee4ed51a987a6386796eb4eeae2bad65891;hb=4c8c9dfc1e3b3b066a62a703fd3ba04db6ad5a45;hp=a3e6aeb614927733676b2b25e4592786cb14ef2e;hpb=2d2be1edb0548e95b771bc9c79cff48eb5d2e1f5;p=samtools.git diff --git a/bam_plcmd.c b/bam_plcmd.c index a3e6aeb..7f13eee 100644 --- a/bam_plcmd.c +++ b/bam_plcmd.c @@ -532,10 +532,12 @@ int bam_pileup(int argc, char *argv[]) #define MPLP_REALN 0x80 #define MPLP_FMT_DP 0x100 #define MPLP_FMT_SP 0x200 +#define MPLP_NO_INDEL 0x400 typedef struct { int max_mq, min_mq, flag, min_baseQ, capQ_thres, max_depth; - char *reg, *fn_pos; + int openQ, extQ, tandemQ; + char *reg, *fn_pos, *pl_list; faidx_t *fai; kh_64_t *hash; } mplp_conf_t; @@ -556,7 +558,7 @@ typedef struct { static int mplp_func(void *data, bam1_t *b) { extern int bam_realn(bam1_t *b, const char *ref); - extern int bam_prob_realn(bam1_t *b, const char *ref); + extern int bam_prob_realn_core(bam1_t *b, const char *ref, int); extern int bam_cap_mapQ(bam1_t *b, char *ref, int thres); mplp_aux_t *ma = (mplp_aux_t*)data; int ret, skip = 0; @@ -565,7 +567,7 @@ static int mplp_func(void *data, bam1_t *b) ret = ma->iter? bam_iter_read(ma->fp, ma->iter, b) : bam_read1(ma->fp, b); if (ret < 0) break; skip = 0; - if (has_ref && (ma->flag&MPLP_REALN)) bam_prob_realn(b, ma->ref); + if (has_ref && (ma->flag&MPLP_REALN)) bam_prob_realn_core(b, ma->ref, 1); if (has_ref && ma->capQ_thres > 10) { int q = bam_cap_mapQ(b, ma->ref, ma->capQ_thres); if (q < 0) skip = 1; @@ -602,6 +604,8 @@ static void group_smpl(mplp_pileup_t *m, bam_sample_t *sm, kstring_t *buf, static int mpileup(mplp_conf_t *conf, int n, char **fn) { + extern void *bcf_call_add_rg(void *rghash, const char *hdtext, const char *list); + extern void bcf_call_del_rghash(void *rghash); mplp_aux_t **data; int i, tid, pos, *n_plp, beg0 = 0, end0 = 1u<<29, ref_len, ref_tid, max_depth; const bam_pileup1_t **plp; @@ -609,6 +613,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) bam_header_t *h = 0; char *ref; khash_t(64) *hash = 0; + void *rghash = 0; bcf_callaux_t *bca = 0; bcf_callret1_t *bcr = 0; @@ -638,6 +643,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) data[i]->fp = strcmp(fn[i], "-") == 0? bam_dopen(fileno(stdin), "r") : bam_open(fn[i], "r"); h_tmp = bam_header_read(data[i]->fp); bam_smpl_add(sm, fn[i], h_tmp->text); + rghash = bcf_call_add_rg(rghash, h_tmp->text, conf->pl_list); if (conf->reg) { int beg, end; bam_index_t *idx; @@ -693,6 +699,8 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) bcf_hdr_write(bp, bh); bca = bcf_call_init(-1., conf->min_baseQ); bcr = calloc(sm->n, sizeof(bcf_callret1_t)); + bca->rghash = rghash; + bca->openQ = conf->openQ, bca->extQ = conf->extQ, bca->tandemQ = conf->tandemQ; } ref_tid = -1; ref = 0; iter = bam_mplp_init(n, mplp_func, (void**)data); @@ -726,9 +734,37 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) for (i = 0; i < gplp.n; ++i) bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], ref16, bca, bcr + i); bcf_call_combine(gplp.n, bcr, ref16, &bc); - bcf_call2bcf(tid, pos, &bc, b, (conf->flag&(MPLP_FMT_DP|MPLP_FMT_SP))? bcr : 0, (conf->flag&MPLP_FMT_SP)); + bcf_call2bcf(tid, pos, &bc, b, (conf->flag&(MPLP_FMT_DP|MPLP_FMT_SP))? bcr : 0, + (conf->flag&MPLP_FMT_SP), 0, 0); bcf_write(bp, bh, b); bcf_destroy(b); + if (!(conf->flag&MPLP_NO_INDEL)) { + // call MNPs + if (bcf_call_mnp_prep(gplp.n, gplp.n_plp, gplp.plp, pos, bca, ref) >= 0) { + for (i = 0; i < gplp.n; ++i) + bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], B2B_REF_MNP, bca, bcr + i); + if (bcf_call_combine(gplp.n, bcr, B2B_REF_MNP, &bc) >= 0) { + b = calloc(1, sizeof(bcf1_t)); + bcf_call2bcf(tid, pos, &bc, b, (conf->flag&(MPLP_FMT_DP|MPLP_FMT_SP))? bcr : 0, + (conf->flag&MPLP_FMT_SP), bca, ref); + bcf_write(bp, bh, b); + bcf_destroy(b); + bca->last_mnp_pos = pos; + } + } + // call indels + if (bcf_call_gap_prep(gplp.n, gplp.n_plp, gplp.plp, pos, bca, ref, rghash) >= 0) { + for (i = 0; i < gplp.n; ++i) + bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], B2B_REF_INDEL, bca, bcr + i); + if (bcf_call_combine(gplp.n, bcr, B2B_REF_INDEL, &bc) >= 0) { + b = calloc(1, sizeof(bcf1_t)); + bcf_call2bcf(tid, pos, &bc, b, (conf->flag&(MPLP_FMT_DP|MPLP_FMT_SP))? bcr : 0, + (conf->flag&MPLP_FMT_SP), bca, ref); + bcf_write(bp, bh, b); + bcf_destroy(b); + } + } + } } else { printf("%s\t%d\t%c", h->target_name[tid], pos + 1, (ref && pos < ref_len)? ref[pos] : 'N'); for (i = 0; i < n; ++i) { @@ -755,6 +791,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) bam_smpl_destroy(sm); free(buf.s); for (i = 0; i < gplp.n; ++i) free(gplp.plp[i]); free(gplp.plp); free(gplp.n_plp); free(gplp.m_plp); + bcf_call_del_rghash(rghash); if (hash) { // free the hash table khint_t k; for (k = kh_begin(hash); k < kh_end(hash); ++k) @@ -834,8 +871,9 @@ int bam_mpileup(int argc, char *argv[]) mplp.min_baseQ = 13; mplp.capQ_thres = 0; mplp.max_depth = 250; + mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100; mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN; - while ((c = getopt(argc, argv, "gf:r:l:M:q:Q:uaORC:BDSd:b:")) >= 0) { + while ((c = getopt(argc, argv, "gf:r:l:M:q:Q:uaORC:BDSd:b:P:o:e:h:I")) >= 0) { switch (c) { case 'f': mplp.fai = fai_load(optarg); @@ -844,6 +882,7 @@ int bam_mpileup(int argc, char *argv[]) case 'd': mplp.max_depth = atoi(optarg); break; case 'r': mplp.reg = strdup(optarg); break; case 'l': mplp.fn_pos = strdup(optarg); break; + case 'P': mplp.pl_list = strdup(optarg); break; case 'g': mplp.flag |= MPLP_GLF; break; case 'u': mplp.flag |= MPLP_NO_COMP | MPLP_GLF; break; case 'a': mplp.flag |= MPLP_NO_ORPHAN | MPLP_REALN; break; @@ -852,11 +891,15 @@ int bam_mpileup(int argc, char *argv[]) case 'R': mplp.flag |= MPLP_REALN; break; case 'D': mplp.flag |= MPLP_FMT_DP; break; case 'S': mplp.flag |= MPLP_FMT_SP; break; + case 'I': mplp.flag |= MPLP_NO_INDEL; break; case 'C': mplp.capQ_thres = atoi(optarg); break; case 'M': mplp.max_mq = atoi(optarg); break; case 'q': mplp.min_mq = atoi(optarg); break; case 'Q': mplp.min_baseQ = atoi(optarg); break; case 'b': file_list = optarg; break; + case 'o': mplp.openQ = atoi(optarg); break; + case 'e': mplp.extQ = atoi(optarg); break; + case 'h': mplp.tandemQ = atoi(optarg); break; } } if (argc == 1) { @@ -870,11 +913,16 @@ int bam_mpileup(int argc, char *argv[]) fprintf(stderr, " -Q INT min base quality [%d]\n", mplp.min_baseQ); fprintf(stderr, " -q INT filter out alignment with MQ smaller than INT [%d]\n", mplp.min_mq); fprintf(stderr, " -d INT max per-sample depth [%d]\n", mplp.max_depth); + fprintf(stderr, " -P STR comma separated list of platforms for indels [all]\n"); + fprintf(stderr, " -o INT Phred-scaled gap open sequencing error probability [%d]\n", mplp.openQ); + fprintf(stderr, " -e INT Phred-scaled gap extension seq error probability [%d]\n", mplp.extQ); + fprintf(stderr, " -h INT coefficient for homopolyer errors [%d]\n", mplp.tandemQ); fprintf(stderr, " -g generate BCF output\n"); fprintf(stderr, " -u do not compress BCF output\n"); fprintf(stderr, " -B disable BAQ computation\n"); fprintf(stderr, " -D output per-sample DP\n"); fprintf(stderr, " -S output per-sample SP (strand bias P-value, slow)\n"); + fprintf(stderr, " -I do not perform indel calling\n"); fprintf(stderr, "\n"); fprintf(stderr, "Notes: Assuming diploid individuals.\n\n"); return 1; @@ -888,7 +936,7 @@ int bam_mpileup(int argc, char *argv[]) } else mpileup(&mplp, argc - optind, argv + optind); - free(mplp.reg); + free(mplp.reg); free(mplp.pl_list); if (mplp.fai) fai_destroy(mplp.fai); return 0; }