bcf_sync(b);
}
-static int update_bcf1(bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p1rst_t *pr, double pref, int flag, double em[9])
+static int update_bcf1(bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p1rst_t *pr, double pref, int flag, double em[10])
{
kstring_t s;
int has_I16, is_var;
if (em[4] >= 0 && em[4] <= 0.05) ksprintf(&s, ";G3=%.4g,%.4g,%.4g;HWE=%.3g", em[3], em[2], em[1], em[4]);
if (em[5] >= 0 && em[6] >= 0) ksprintf(&s, ";AF2=%.4g,%.4g", 1 - em[5], 1 - em[6]);
if (em[7] >= 0) ksprintf(&s, ";LRT=%.3g", em[7]);
+ if (em[8] >= 0) ksprintf(&s, ";LRT2=%.3g", em[8]);
+ //if (em[9] >= 0) ksprintf(&s, ";LRT1=%.3g", em[9]);
}
if (pr == 0) { // if pr is unset, return
kputc('\0', &s); kputs(b->fmt, &s); kputc('\0', &s);
is_var = (pr->p_ref < pref);
r = is_var? pr->p_ref : pr->p_var;
- ksprintf(&s, ";CI95=%.4g,%.4g", pr->cil, pr->cih); // FIXME: when EM is not used, ";" should be omitted!
+// ksprintf(&s, ";CI95=%.4g,%.4g", pr->cil, pr->cih); // FIXME: when EM is not used, ";" should be omitted!
+ ksprintf(&s, ";AC1=%d", pr->ac);
if (has_I16) ksprintf(&s, ";DP4=%d,%d,%d,%d;MQ=%d", a.d[0], a.d[1], a.d[2], a.d[3], a.mq);
fq = pr->p_ref_folded < 0.5? -4.343 * log(pr->p_ref_folded) : 4.343 * log(pr->p_var_folded);
if (fq < -999) fq = -999;
if (q[i] > 255) q[i] = 255;
}
if (pr->perm_rank >= 0) ksprintf(&s, ";PR=%d", pr->perm_rank);
+ // ksprintf(&s, ";LRT3=%.3g", pr->lrt);
ksprintf(&s, ";PCHI2=%.3g;PC2=%d,%d", q[1], q[2], pr->p_chi2);
}
if (has_I16 && a.is_tested) ksprintf(&s, ";PV4=%.2g,%.2g,%.2g,%.2g", a.p[0], a.p[1], a.p[2], a.p[3]);
if (!strstr(str.s, "##INFO=<ID=FQ,"))
kputs("##INFO=<ID=FQ,Number=1,Type=Float,Description=\"Phred probability of all samples being the same\">\n", &str);
if (!strstr(str.s, "##INFO=<ID=AF1,"))
- kputs("##INFO=<ID=AF1,Number=1,Type=Float,Description=\"Max-likelihood estimate of the site allele frequency of the first ALT allele\">\n", &str);
+ kputs("##INFO=<ID=AF1,Number=1,Type=Float,Description=\"Max-likelihood estimate of the first ALT allele frequency (assuming HWE)\">\n", &str);
+ if (!strstr(str.s, "##INFO=<ID=AC1,"))
+ kputs("##INFO=<ID=AC1,Number=1,Type=Float,Description=\"Max-likelihood estimate of the first ALT allele count (no HWE assumption)\">\n", &str);
if (!strstr(str.s, "##INFO=<ID=G3,"))
kputs("##INFO=<ID=G3,Number=3,Type=Float,Description=\"ML estimate of genotype frequencies\">\n", &str);
if (!strstr(str.s, "##INFO=<ID=HWE,"))
kputs("##INFO=<ID=HWE,Number=1,Type=Float,Description=\"Chi^2 based HWE test P-value based on G3\">\n", &str);
- if (!strstr(str.s, "##INFO=<ID=CI95,"))
- kputs("##INFO=<ID=CI95,Number=2,Type=Float,Description=\"Equal-tail Bayesian credible interval of the site allele frequency at the 95% level\">\n", &str);
+// if (!strstr(str.s, "##INFO=<ID=CI95,"))
+// kputs("##INFO=<ID=CI95,Number=2,Type=Float,Description=\"Equal-tail Bayesian credible interval of the site allele frequency at the 95% level\">\n", &str);
if (!strstr(str.s, "##INFO=<ID=PV4,"))
kputs("##INFO=<ID=PV4,Number=4,Type=Float,Description=\"P-values for strand bias, baseQ bias, mapQ bias and tail distance bias\">\n", &str);
if (!strstr(str.s, "##INFO=<ID=INDEL,"))
}
while (vcf_read(bp, hin, b) > 0) {
int is_indel;
- double em[9];
+ double em[10];
if ((vc.flag & VC_VARONLY) && strcmp(b->alt, "X") == 0) continue;
if ((vc.flag & VC_VARONLY) && vc.min_smpl_frac > 0.) {
extern int bcf_smpl_covered(const bcf1_t *b);
bcf_2qcall(hout, b);
continue;
}
- if (vc.flag & VC_EM) bcf_em1(b, vc.n1, 0xff, em);
+ if (vc.flag & (VC_CALL|VC_ADJLD|VC_EM)) bcf_gl2pl(b);
+ if (vc.flag & VC_EM) bcf_em1(b, vc.n1, 0x1ff, em);
else {
int i;
for (i = 0; i < 9; ++i) em[i] = -1.;
}
- if (vc.flag & (VC_CALL|VC_ADJLD)) bcf_gl2pl(b);
if (vc.flag & VC_CALL) { // call variants
bcf_p1rst_t pr;
int calret = bcf_p1_cal(b, (em[7] >= 0 && em[7] < vc.min_lrt), p1, &pr);
int i, n = 0;
for (i = 0; i < vc.n_perm; ++i) {
#ifdef BCF_PERM_LRT // LRT based permutation is much faster but less robust to artifacts
- double x[9];
+ double x[10];
bcf_shuffle(b, seeds[i]);
bcf_em1(b, vc.n1, 1<<7, x);
if (x[7] < em[7]) ++n;