#define VC_CALL_GT 2048
#define VC_ADJLD 4096
#define VC_NO_INDEL 8192
-#define VC_FOLDED 16384
typedef struct {
int flag, prior_type, n1;
{
kstring_t s;
int is_var = (pr->p_ref < pref);
- double p_hwe, r = is_var? pr->p_ref : 1. - pr->p_ref;
+ double p_hwe, r = is_var? pr->p_ref : pr->p_var, fq;
anno16_t a;
p_hwe = pr->g[0] >= 0.? test_hwe(pr->g) : 1.0; // only do HWE g[] is calculated
// ksprintf(&s, "AF1=%.4lg;AFE=%.4lg;CI95=%.4lg,%.4lg", 1.-pr->f_em, 1.-pr->f_exp, pr->cil, pr->cih);
ksprintf(&s, "AF1=%.4lg;CI95=%.4lg,%.4lg", 1.-pr->f_em, pr->cil, pr->cih);
ksprintf(&s, ";DP4=%d,%d,%d,%d;MQ=%d", a.d[0], a.d[1], a.d[2], a.d[3], a.mq);
+ fq = pr->p_ref_folded < 0.5? -4.343 * log(pr->p_ref_folded) : 4.343 * log(pr->p_var_folded);
+ if (fq < -999) fq = -999;
+ if (fq > 999) fq = 999;
+ ksprintf(&s, ";FQ=%.3lg", fq);
if (a.is_tested) {
if (pr->pc[0] >= 0.) ksprintf(&s, ";PC4=%lg,%lg,%lg,%lg", pr->pc[0], pr->pc[1], pr->pc[2], pr->pc[3]);
ksprintf(&s, ";PV4=%.2lg,%.2lg,%.2lg,%.2lg", a.p[0], a.p[1], a.p[2], a.p[3]);
kputs(b->fmt, &s); kputc('\0', &s);
free(b->str);
b->m_str = s.m; b->l_str = s.l; b->str = s.s;
- b->qual = r < 1e-100? 99 : -4.343 * log(r);
- if (b->qual > 99) b->qual = 99;
+ b->qual = r < 1e-100? 999 : -4.343 * log(r);
+ if (b->qual > 999) b->qual = 999;
bcf_sync(b);
if (!is_var) bcf_shrink_alt(b, 1);
else if (!(flag&VC_KEEPALT))
tid = begin = end = -1;
memset(&vc, 0, sizeof(viewconf_t));
vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; vc.indel_frac = -1.;
- while ((c = getopt(argc, argv, "fN1:l:cHAGvbSuP:t:p:QgLi:I")) >= 0) {
+ while ((c = getopt(argc, argv, "N1:l:cHAGvbSuP:t:p:QgLi:I")) >= 0) {
switch (c) {
- case 'f': vc.flag |= VC_FOLDED; break;
case '1': vc.n1 = atoi(optarg); break;
case 'l': vc.fn_list = strdup(optarg); break;
case 'N': vc.flag |= VC_ACGT_ONLY; break;
fprintf(stderr, " -Q output the QCALL likelihood format\n");
fprintf(stderr, " -L calculate LD for adjacent sites\n");
fprintf(stderr, " -I skip indels\n");
- fprintf(stderr, " -f reference-free variant calling\n");
fprintf(stderr, " -1 INT number of group-1 samples [0]\n");
fprintf(stderr, " -l FILE list of sites to output [all sites]\n");
fprintf(stderr, " -t FLOAT scaled substitution mutation rate [%.4lg]\n", vc.theta);
bcf_p1_init_subprior(p1, vc.prior_type, vc.theta);
}
if (vc.indel_frac > 0.) bcf_p1_indel_prior(p1, vc.indel_frac); // otherwise use the default indel_frac
- if (vc.flag & VC_FOLDED) bcf_p1_set_folded(p1);
}
if (vc.fn_list) hash = bcf_load_pos(vc.fn_list, h);
if (optind + 1 < argc && !(vc.flag&VC_VCFIN)) {
};
struct __bcf_p1aux_t {
- int n, M, n1, is_indel, is_folded;
+ int n, M, n1, is_indel;
double *q2p, *pdg; // pdg -> P(D|g)
double *phi, *phi_indel;
double *z, *zswap; // aux for afs
int PL_len;
};
-static void fold_array(int M, double *x)
-{
- int k;
- for (k = 0; k < M/2; ++k)
- x[k] = x[M-k] = (x[k] + x[M-k]) / 2.;
-}
-
void bcf_p1_indel_prior(bcf_p1aux_t *ma, double x)
{
int i;
return 0;
}
-void bcf_p1_set_folded(bcf_p1aux_t *p1a)
-{
- if (p1a->n1 < 0) {
- p1a->is_folded = 1;
- fold_array(p1a->M, p1a->phi);
- fold_array(p1a->M, p1a->phi_indel);
- }
-}
-
void bcf_p1_destroy(bcf_p1aux_t *ma)
{
if (ma) {
pc[1] = pc[1] == 1.? 99 : (int)(-4.343 * log(1. - pc[1]) + .499);
}
-static double mc_cal_afs(bcf_p1aux_t *ma)
+static double mc_cal_afs(bcf_p1aux_t *ma, double *p_ref_folded, double *p_var_folded)
{
int k;
- long double sum = 0.;
+ long double sum = 0., sum2;
double *phi = ma->is_indel? ma->phi_indel : ma->phi;
memset(ma->afs1, 0, sizeof(double) * (ma->M + 1));
mc_cal_y(ma);
+ // compute AFS
for (k = 0, sum = 0.; k <= ma->M; ++k)
sum += (long double)phi[k] * ma->z[k];
for (k = 0; k <= ma->M; ++k) {
ma->afs1[k] = phi[k] * ma->z[k] / sum;
if (isnan(ma->afs1[k]) || isinf(ma->afs1[k])) return -1.;
}
+ // compute folded variant probability
+ for (k = 0, sum = 0.; k <= ma->M; ++k)
+ sum += (long double)(phi[k] + phi[ma->M - k]) / 2. * ma->z[k];
+ for (k = 1, sum2 = 0.; k < ma->M; ++k)
+ sum2 += (long double)(phi[k] + phi[ma->M - k]) / 2. * ma->z[k];
+ *p_var_folded = sum2 / sum;
+ *p_ref_folded = (phi[k] + phi[ma->M - k]) / 2. * (ma->z[ma->M] + ma->z[0]) / sum;
+ // the expected frequency
for (k = 0, sum = 0.; k <= ma->M; ++k) {
ma->afs[k] += ma->afs1[k];
sum += k * ma->afs1[k];
if (b->n_alleles < 2) return -1; // FIXME: find a better solution
//
rst->rank0 = cal_pdg(b, ma);
- rst->f_exp = mc_cal_afs(ma);
- rst->p_ref = ma->is_folded? ma->afs1[ma->M] + ma->afs1[0] : ma->afs1[ma->M];
+ rst->f_exp = mc_cal_afs(ma, &rst->p_ref_folded, &rst->p_var_folded);
+ rst->p_ref = ma->afs1[ma->M];
+ for (k = 0, sum = 0.; k < ma->M; ++k)
+ sum += ma->afs1[k];
+ rst->p_var = (double)sum;
// calculate f_flat and f_em
for (k = 0, sum = 0.; k <= ma->M; ++k)
sum += (long double)ma->z[k];
void bcf_p1_dump_afs(bcf_p1aux_t *ma)
{
int k;
- if (ma->is_folded) fold_array(ma->M, ma->afs);
fprintf(stderr, "[afs]");
for (k = 0; k <= ma->M; ++k)
fprintf(stderr, " %d:%.3lf", k, ma->afs[ma->M - k]);