CC= gcc
CFLAGS= -g -Wall -O2 #-m64 #-arch ppc
-DFLAGS= -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE
+DFLAGS= -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE #-D_BCF_QUAD
LOBJS= bcf.o vcf.o bcfutils.o prob1.o kfunc.o index.o fet.o
OMISC= ..
AOBJS= call1.o main.o $(OMISC)/kstring.o $(OMISC)/bgzf.o $(OMISC)/knetfile.o
ksprintf(&s, "AF1=%.3lf;AFE=%.3lf", 1.-pr->f_em, 1.-pr->f_exp);
ksprintf(&s, ";DP4=%d,%d,%d,%d;MQ=%d", a.d[0], a.d[1], a.d[2], a.d[3], a.mq);
if (a.is_tested) {
- if (pr->pc[0] >= 0.) ksprintf(&s, ";PC4=%.2lg,%.2lg,%.2lg,%.2lg", pr->pc[0], pr->pc[1], pr->pc[2], pr->pc[3]);
+ if (pr->pc[0] >= 0.) ksprintf(&s, ";PC4=%.4lf,%.4lf,%.2lg,%.2lg", pr->pc[0], pr->pc[1], pr->pc[2], pr->pc[3]);
ksprintf(&s, ";PV4=%.2lg,%.2lg,%.2lg,%.2lg", a.p[0], a.p[1], a.p[2], a.p[3]);
}
if (pr->g[0] >= 0. && p_hwe <= .2)
kputs(b->fmt, &s); kputc('\0', &s);
free(b->str);
b->m_str = s.m; b->l_str = s.l; b->str = s.s;
- b->qual = r < 1e-100? 99 : -3.434 * log(r);
+ b->qual = r < 1e-100? 99 : -4.343 * log(r);
if (b->qual > 99) b->qual = 99;
bcf_sync(n_smpl, b);
if (!is_var) bcf_shrink_alt(n_smpl, b, 1);
tid = begin = end = -1;
memset(&vc, 0, sizeof(viewconf_t));
- vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.9;
+ vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5;
while ((c = getopt(argc, argv, "1:l:cHAGvLbSuP:t:p:")) >= 0) {
switch (c) {
case '1': vc.n1 = atoi(optarg); break;
if (b->tid != tid || b->pos >= end) break;
if (!(l > begin && end > b->pos)) continue;
}
+ ++n_processed;
if (vc.flag & VC_CALL) {
bcf_p1rst_t pr;
bcf_gl2pl(h->n_smpl, b);
bcf_p1_cal(b, p1, &pr); // pr.g[3] is not calculated here
if (vc.flag&VC_HWE) bcf_p1_cal_g3(p1, pr.g);
- if ((n_processed + 1) % 50000 == 0) bcf_p1_dump_afs(p1);
+ if (n_processed % 100000 == 0) {
+ fprintf(stderr, "[%s] %ld sites processed.\n", __func__, (long)n_processed);
+ bcf_p1_dump_afs(p1);
+ }
if (pr.p_ref >= vc.pref && (vc.flag & VC_VARONLY)) continue;
update_bcf1(h->n_smpl, b, p1, &pr, vc.pref, vc.flag);
}
b->fmt[0] = '\0';
}
vcf_write(bout, h, b);
- ++n_processed;
}
if (vc.prior_file) free(vc.prior_file);
if (vc.flag & VC_CALL) bcf_p1_dump_afs(p1);
for (k = 0; k <= ma->M; ++k) ma->phi[k] /= sum;
for (k = 0; k <= ma->M; ++k) fprintf(stderr, " %d:%.3lg", k, ma->phi[ma->M - k]);
fputc('\n', stderr);
+ for (sum = 0., k = 1; k <= ma->M; ++k) sum += k * ma->phi[ma->M - k];
+ fprintf(stderr, "[heterozygosity] %lf\n", (double)sum / ma->M);
return 0;
}
}
max = 1. - max;
if (max < 1e-308) max = 1e-308;
- q = (int)(-3.434 * log(max) + .499);
+ q = (int)(-4.343 * log(max) + .499);
if (q > 99) q = 99;
return q<<2|max_i;
}
sum_alt += (long double)ma->phi[k] * ma->z[k];
// printf("* %lg, %lg *\n", (double)sum, (double)sum_alt); // DEBUG: sum should equal sum_alt
sum = sum_alt;
- y = lgamma(2*n1 + 1) - lgamma(ma->M + 1);
- for (k = 1, x = 0.; k <= 2 * n2; ++k)
- x += ma->phi[k] * ma->z2[k] * exp(lgamma(ma->M - k + 1) - lgamma(2*n2 - k + 1) + y);
- pc[0] = ma->z1[0] * x / sum;
+ // the variant is specific to group2
+// printf("%lg %lg %lg %lg\n", ma->z[2*(n1+n2)]/exp(ma->t - (ma->t1 + ma->t2)), ma->z1[2*n1], ma->z2[2*n2], (double)sum);
y = lgamma(2*n2 + 1) - lgamma(ma->M + 1);
+ for (k = 0, x = 0.; k < 2 * n2; ++k)
+ x += ma->phi[2*n1+k] * ma->z2[k] * expl(lgamma(2*n1 + k + 1) - lgamma(k + 1) + y);
+ pc[1] = ma->z1[2*n1] * x / sum;
+ for (k = 1, x = 0.; k <= 2 * n2; ++k)
+ x += ma->phi[k] * ma->z2[k] * expl(lgamma(ma->M - k + 1) - lgamma(2*n2 - k + 1) + y);
+ pc[1] += ma->z1[0] * x / sum;
+ // the variant is specific to group1
+ y = lgamma(2*n1 + 1) - lgamma(ma->M + 1);
+ for (k = 0, x = 0.; k < 2 * n1; ++k)
+ x += ma->phi[2*n2+k] * ma->z1[k] * expl(lgamma(2*n2 + k + 1) - lgamma(k + 1) + y);
+ pc[0] = ma->z2[2*n2] * x / sum;
for (k = 1, x = 0.; k <= 2 * n1; ++k)
- x += ma->phi[k] * ma->z1[k] * exp(lgamma(ma->M - k + 1) - lgamma(2*n1 - k + 1) + y);
- pc[1] = ma->z2[0] * x / sum;
- for (k = 0; k < 4; ++k) {
+ x += ma->phi[k] * ma->z1[k] * expl(lgamma(ma->M - k + 1) - lgamma(2*n1 - k + 1) + y);
+ pc[0] += ma->z2[0] * x / sum;
+ // rescale
+ for (k = 2; k < 4; ++k) {
y = 1. - pc[k];
if (y <= 0.) y = 1e-100;
- pc[k] = (int)(-3.434 * log(y) + .499);
+ pc[k] = (int)(-4.343 * log(y) + .499);
if (pc[k] > 99.) pc[k] = 99.;
}
}
my @t = split;
next if (length($t[3]) != 1 || uc($t[3]) eq 'N');
my @s = split(',', $t[4]);
+ $t[5] = 3 if ($t[5] < 0);
$t[3] = uc($t[3]); $t[4] = uc($t[4]);
next if (length($s[0]) != 1);
push(@a, [$t[5], ($t[4] eq '.' || $t[4] eq $t[3])? 0 : 1, $ts{$t[3].$s[0]}? 1 : 0, $h{$t[0],$t[1]}? 1 : 0]);
my @x;
$x[0] = sprintf("%.3f", $c[1]-$c[2]? $c[2] / ($c[1] - $c[2]) : 100);
$x[1] = sprintf("%.3f", $hsize? $c[3] / $hsize : 0);
- $x[2] = sprintf("%.3f", $c[3] / $c[0]);
+ $x[2] = sprintf("%.3f", $c[3] / $c[1]);
print join("\t", $last, @c, @x), "\n";
$next = $c[0]/@a + $opts{s};
}