From c0490a669e9f1f9257e4b02a263e92e842bf5357 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Wed, 4 Aug 2010 02:35:27 +0000 Subject: [PATCH] * allow to set min base q * fixed a bug in mpileup -u --- bam2bcf.c | 16 +++++----------- bam2bcf.h | 2 +- bam_plcmd.c | 13 +++++++++---- bcftools/bcf.c | 4 ++-- 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/bam2bcf.c b/bam2bcf.c index d69c771..e64e335 100644 --- a/bam2bcf.c +++ b/bam2bcf.c @@ -12,18 +12,19 @@ extern void ks_introsort_uint32_t(size_t n, uint32_t a[]); #define CALL_DEFTHETA 0.85f struct __bcf_callaux_t { - int max_info, capQ; + int max_info, capQ, min_baseQ; double *fk; uint32_t *info; }; -bcf_callaux_t *bcf_call_init(double theta) +bcf_callaux_t *bcf_call_init(double theta, int min_baseQ) { bcf_callaux_t *bca; int n; if (theta <= 0.) theta = CALL_DEFTHETA; bca = calloc(1, sizeof(bcf_callaux_t)); bca->capQ = 60; + bca->min_baseQ = min_baseQ; bca->fk = calloc(CALL_MAX, sizeof(double)); bca->fk[0] = 1.; for (n = 1; n < CALL_MAX; ++n) @@ -89,6 +90,7 @@ int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base /*4-bit*/, bcf uint32_t q, x = 0, qq; if (p->is_del || (p->b->core.flag&BAM_FUNMAP)) continue; // skip unmapped reads and deleted bases q = (uint32_t)bam1_qual(p->b)[p->qpos]; // base quality + if (q < bca->min_baseQ) continue; x |= (uint32_t)bam1_strand(p->b) << 18 | q << 8 | p->b->core.qual; if (p->b->core.qual < q) q = p->b->core.qual; // cap the overall quality at mapping quality x |= q << 24; @@ -137,17 +139,9 @@ int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base /*4-bit*/, bcf p[j*5+k] = p[k*5+j] = 3.01 * (aux.c[j] + aux.c[k]) + tmp; } } - return 0; + return r->depth; } -/* - 1) Find the top 2 bases (from esum[4]). - - 2) If the reference base is among the top 2, consider the locus is - potentially biallelic and set call->a[2] as -1; otherwise, the - locus is potentially triallelic. If the reference is ambiguous, - take the weakest call as the pseudo-reference. - */ int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/, bcf_call_t *call) { int ref4, i, j; diff --git a/bam2bcf.h b/bam2bcf.h index 896108a..655174f 100644 --- a/bam2bcf.h +++ b/bam2bcf.h @@ -24,7 +24,7 @@ typedef struct { extern "C" { #endif - bcf_callaux_t *bcf_call_init(double theta); + bcf_callaux_t *bcf_call_init(double theta, int min_baseQ); void bcf_call_destroy(bcf_callaux_t *bca); int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base /*4-bit*/, bcf_callaux_t *bca, bcf_callret1_t *r); int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/, bcf_call_t *call); diff --git a/bam_plcmd.c b/bam_plcmd.c index 8affabf..2c3178c 100644 --- a/bam_plcmd.c +++ b/bam_plcmd.c @@ -454,11 +454,12 @@ int bam_pileup(int argc, char *argv[]) #define MPLP_VAR 0x2 #define MPLP_AFALL 0x8 #define MPLP_GLF 0x10 +#define MPLP_NO_COMP 0x20 #define MPLP_AFS_BLOCK 0x10000 typedef struct { - int max_mq, min_mq, prior_type, flag; + int max_mq, min_mq, prior_type, flag, min_baseQ; double theta; char *reg, *fn_pos; faidx_t *fai; @@ -538,7 +539,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) if (conf->flag & MPLP_GLF) { kstring_t s; s.l = s.m = 0; s.s = 0; - bp = bcf_open("-", "w"); + bp = bcf_open("-", (conf->flag&MPLP_NO_COMP)? "wu" : "w"); for (i = 0; i < h->n_targets; ++i) { kputs(h->target_name[i], &s); kputc('\0', &s); @@ -583,7 +584,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) } // mpileup if (conf->flag & MPLP_GLF) { - bca = bcf_call_init(-1.); + bca = bcf_call_init(-1., conf->min_baseQ); bcr = calloc(n, sizeof(bcf_callret1_t)); } else if (conf->flag & MPLP_VCF) { ma = mc_init(n); @@ -716,7 +717,8 @@ int bam_mpileup(int argc, char *argv[]) mplp.max_mq = 60; mplp.prior_type = MC_PTYPE_FULL; mplp.theta = 1e-3; - while ((c = getopt(argc, argv, "gvVcFSP:f:r:l:VM:q:t:")) >= 0) { + mplp.min_baseQ = 13; + while ((c = getopt(argc, argv, "gvVcFSP:f:r:l:VM:q:t:Q:u")) >= 0) { switch (c) { case 't': mplp.theta = atof(optarg); break; case 'P': @@ -739,8 +741,10 @@ int bam_mpileup(int argc, char *argv[]) case 'c': mplp.flag |= MPLP_VCF; break; case 'F': mplp.flag |= MPLP_AFALL; break; case 'v': mplp.flag |= MPLP_VAR; break; + case 'u': mplp.flag |= MPLP_NO_COMP; break; case 'M': mplp.max_mq = atoi(optarg); break; case 'q': mplp.min_mq = atoi(optarg); break; + case 'Q': mplp.min_baseQ = atoi(optarg); break; } } if (mplp.flag&MPLP_GLF) mplp.flag &= ~MPLP_VCF; @@ -754,6 +758,7 @@ int bam_mpileup(int argc, char *argv[]) fprintf(stderr, " -q INT filter out alignment with MQ smaller than INT [%d]\n", mplp.min_mq); fprintf(stderr, " -t FLOAT scaled mutation rate [%lg]\n", mplp.theta); fprintf(stderr, " -P STR prior: full, flat, cond2 [full]\n"); + fprintf(stderr, " -Q INT min base quality [%d]\n", mplp.min_baseQ); fprintf(stderr, " -c generate VCF output (consensus calling)\n"); fprintf(stderr, " -g generate GLF output\n"); fprintf(stderr, " -v show variant sites only\n"); diff --git a/bcftools/bcf.c b/bcftools/bcf.c index f8b55c2..e406319 100644 --- a/bcftools/bcf.c +++ b/bcftools/bcf.c @@ -17,9 +17,9 @@ bcf_t *bcf_open(const char *fn, const char *mode) bcf_t *b; b = calloc(1, sizeof(bcf_t)); if (strchr(mode, 'w')) { - b->fp = strcmp(fn, "-")? bgzf_open(fn, mode) : bgzf_fdopen(fileno(stdout), "w"); + b->fp = strcmp(fn, "-")? bgzf_open(fn, mode) : bgzf_fdopen(fileno(stdout), mode); } else { - b->fp = strcmp(fn, "-")? bgzf_open(fn, mode) : bgzf_fdopen(fileno(stdin), "r"); + b->fp = strcmp(fn, "-")? bgzf_open(fn, mode) : bgzf_fdopen(fileno(stdin), mode); } b->fp->owned_file = 1; if (strchr(mode, 'r')) bcf_hdr_read(b); -- 2.39.2