* fixed a bug in mpileup -u
#define CALL_DEFTHETA 0.85f
struct __bcf_callaux_t {
#define CALL_DEFTHETA 0.85f
struct __bcf_callaux_t {
+ int max_info, capQ, min_baseQ;
double *fk;
uint32_t *info;
};
double *fk;
uint32_t *info;
};
-bcf_callaux_t *bcf_call_init(double theta)
+bcf_callaux_t *bcf_call_init(double theta, int min_baseQ)
{
bcf_callaux_t *bca;
int n;
if (theta <= 0.) theta = CALL_DEFTHETA;
bca = calloc(1, sizeof(bcf_callaux_t));
bca->capQ = 60;
{
bcf_callaux_t *bca;
int n;
if (theta <= 0.) theta = CALL_DEFTHETA;
bca = calloc(1, sizeof(bcf_callaux_t));
bca->capQ = 60;
+ bca->min_baseQ = min_baseQ;
bca->fk = calloc(CALL_MAX, sizeof(double));
bca->fk[0] = 1.;
for (n = 1; n < CALL_MAX; ++n)
bca->fk = calloc(CALL_MAX, sizeof(double));
bca->fk[0] = 1.;
for (n = 1; n < CALL_MAX; ++n)
uint32_t q, x = 0, qq;
if (p->is_del || (p->b->core.flag&BAM_FUNMAP)) continue; // skip unmapped reads and deleted bases
q = (uint32_t)bam1_qual(p->b)[p->qpos]; // base quality
uint32_t q, x = 0, qq;
if (p->is_del || (p->b->core.flag&BAM_FUNMAP)) continue; // skip unmapped reads and deleted bases
q = (uint32_t)bam1_qual(p->b)[p->qpos]; // base quality
+ if (q < bca->min_baseQ) continue;
x |= (uint32_t)bam1_strand(p->b) << 18 | q << 8 | p->b->core.qual;
if (p->b->core.qual < q) q = p->b->core.qual; // cap the overall quality at mapping quality
x |= q << 24;
x |= (uint32_t)bam1_strand(p->b) << 18 | q << 8 | p->b->core.qual;
if (p->b->core.qual < q) q = p->b->core.qual; // cap the overall quality at mapping quality
x |= q << 24;
p[j*5+k] = p[k*5+j] = 3.01 * (aux.c[j] + aux.c[k]) + tmp;
}
}
p[j*5+k] = p[k*5+j] = 3.01 * (aux.c[j] + aux.c[k]) + tmp;
}
}
-/*
- 1) Find the top 2 bases (from esum[4]).
-
- 2) If the reference base is among the top 2, consider the locus is
- potentially biallelic and set call->a[2] as -1; otherwise, the
- locus is potentially triallelic. If the reference is ambiguous,
- take the weakest call as the pseudo-reference.
- */
int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/, bcf_call_t *call)
{
int ref4, i, j;
int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/, bcf_call_t *call)
{
int ref4, i, j;
- bcf_callaux_t *bcf_call_init(double theta);
+ bcf_callaux_t *bcf_call_init(double theta, int min_baseQ);
void bcf_call_destroy(bcf_callaux_t *bca);
int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base /*4-bit*/, bcf_callaux_t *bca, bcf_callret1_t *r);
int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/, bcf_call_t *call);
void bcf_call_destroy(bcf_callaux_t *bca);
int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base /*4-bit*/, bcf_callaux_t *bca, bcf_callret1_t *r);
int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/, bcf_call_t *call);
#define MPLP_VAR 0x2
#define MPLP_AFALL 0x8
#define MPLP_GLF 0x10
#define MPLP_VAR 0x2
#define MPLP_AFALL 0x8
#define MPLP_GLF 0x10
+#define MPLP_NO_COMP 0x20
#define MPLP_AFS_BLOCK 0x10000
typedef struct {
#define MPLP_AFS_BLOCK 0x10000
typedef struct {
- int max_mq, min_mq, prior_type, flag;
+ int max_mq, min_mq, prior_type, flag, min_baseQ;
double theta;
char *reg, *fn_pos;
faidx_t *fai;
double theta;
char *reg, *fn_pos;
faidx_t *fai;
if (conf->flag & MPLP_GLF) {
kstring_t s;
s.l = s.m = 0; s.s = 0;
if (conf->flag & MPLP_GLF) {
kstring_t s;
s.l = s.m = 0; s.s = 0;
- bp = bcf_open("-", "w");
+ bp = bcf_open("-", (conf->flag&MPLP_NO_COMP)? "wu" : "w");
for (i = 0; i < h->n_targets; ++i) {
kputs(h->target_name[i], &s);
kputc('\0', &s);
for (i = 0; i < h->n_targets; ++i) {
kputs(h->target_name[i], &s);
kputc('\0', &s);
}
// mpileup
if (conf->flag & MPLP_GLF) {
}
// mpileup
if (conf->flag & MPLP_GLF) {
- bca = bcf_call_init(-1.);
+ bca = bcf_call_init(-1., conf->min_baseQ);
bcr = calloc(n, sizeof(bcf_callret1_t));
} else if (conf->flag & MPLP_VCF) {
ma = mc_init(n);
bcr = calloc(n, sizeof(bcf_callret1_t));
} else if (conf->flag & MPLP_VCF) {
ma = mc_init(n);
mplp.max_mq = 60;
mplp.prior_type = MC_PTYPE_FULL;
mplp.theta = 1e-3;
mplp.max_mq = 60;
mplp.prior_type = MC_PTYPE_FULL;
mplp.theta = 1e-3;
- while ((c = getopt(argc, argv, "gvVcFSP:f:r:l:VM:q:t:")) >= 0) {
+ mplp.min_baseQ = 13;
+ while ((c = getopt(argc, argv, "gvVcFSP:f:r:l:VM:q:t:Q:u")) >= 0) {
switch (c) {
case 't': mplp.theta = atof(optarg); break;
case 'P':
switch (c) {
case 't': mplp.theta = atof(optarg); break;
case 'P':
case 'c': mplp.flag |= MPLP_VCF; break;
case 'F': mplp.flag |= MPLP_AFALL; break;
case 'v': mplp.flag |= MPLP_VAR; break;
case 'c': mplp.flag |= MPLP_VCF; break;
case 'F': mplp.flag |= MPLP_AFALL; break;
case 'v': mplp.flag |= MPLP_VAR; break;
+ case 'u': mplp.flag |= MPLP_NO_COMP; break;
case 'M': mplp.max_mq = atoi(optarg); break;
case 'q': mplp.min_mq = atoi(optarg); break;
case 'M': mplp.max_mq = atoi(optarg); break;
case 'q': mplp.min_mq = atoi(optarg); break;
+ case 'Q': mplp.min_baseQ = atoi(optarg); break;
}
}
if (mplp.flag&MPLP_GLF) mplp.flag &= ~MPLP_VCF;
}
}
if (mplp.flag&MPLP_GLF) mplp.flag &= ~MPLP_VCF;
fprintf(stderr, " -q INT filter out alignment with MQ smaller than INT [%d]\n", mplp.min_mq);
fprintf(stderr, " -t FLOAT scaled mutation rate [%lg]\n", mplp.theta);
fprintf(stderr, " -P STR prior: full, flat, cond2 [full]\n");
fprintf(stderr, " -q INT filter out alignment with MQ smaller than INT [%d]\n", mplp.min_mq);
fprintf(stderr, " -t FLOAT scaled mutation rate [%lg]\n", mplp.theta);
fprintf(stderr, " -P STR prior: full, flat, cond2 [full]\n");
+ fprintf(stderr, " -Q INT min base quality [%d]\n", mplp.min_baseQ);
fprintf(stderr, " -c generate VCF output (consensus calling)\n");
fprintf(stderr, " -g generate GLF output\n");
fprintf(stderr, " -v show variant sites only\n");
fprintf(stderr, " -c generate VCF output (consensus calling)\n");
fprintf(stderr, " -g generate GLF output\n");
fprintf(stderr, " -v show variant sites only\n");
bcf_t *b;
b = calloc(1, sizeof(bcf_t));
if (strchr(mode, 'w')) {
bcf_t *b;
b = calloc(1, sizeof(bcf_t));
if (strchr(mode, 'w')) {
- b->fp = strcmp(fn, "-")? bgzf_open(fn, mode) : bgzf_fdopen(fileno(stdout), "w");
+ b->fp = strcmp(fn, "-")? bgzf_open(fn, mode) : bgzf_fdopen(fileno(stdout), mode);
- b->fp = strcmp(fn, "-")? bgzf_open(fn, mode) : bgzf_fdopen(fileno(stdin), "r");
+ b->fp = strcmp(fn, "-")? bgzf_open(fn, mode) : bgzf_fdopen(fileno(stdin), mode);
}
b->fp->owned_file = 1;
if (strchr(mode, 'r')) bcf_hdr_read(b);
}
b->fp->owned_file = 1;
if (strchr(mode, 'r')) bcf_hdr_read(b);