From 955605d6cf50290e32ff923da397021361ceb596 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Fri, 3 Sep 2010 04:30:48 +0000 Subject: [PATCH] * changed 3.434 to 4.343 (typo!) * fixed a bug in the contrast caller * calculate heterozygosity --- bcftools/Makefile | 2 +- bcftools/call1.c | 13 ++++++++----- bcftools/prob1.c | 30 +++++++++++++++++++++--------- bcftools/vcfutils.pl | 3 ++- 4 files changed, 32 insertions(+), 16 deletions(-) diff --git a/bcftools/Makefile b/bcftools/Makefile index 5a3dc76..e985bc9 100644 --- a/bcftools/Makefile +++ b/bcftools/Makefile @@ -1,6 +1,6 @@ CC= gcc CFLAGS= -g -Wall -O2 #-m64 #-arch ppc -DFLAGS= -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE +DFLAGS= -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE #-D_BCF_QUAD LOBJS= bcf.o vcf.o bcfutils.o prob1.o kfunc.o index.o fet.o OMISC= .. AOBJS= call1.o main.o $(OMISC)/kstring.o $(OMISC)/bgzf.o $(OMISC)/knetfile.o diff --git a/bcftools/call1.c b/bcftools/call1.c index 995d500..c5d466e 100644 --- a/bcftools/call1.c +++ b/bcftools/call1.c @@ -161,7 +161,7 @@ static int update_bcf1(int n_smpl, bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p ksprintf(&s, "AF1=%.3lf;AFE=%.3lf", 1.-pr->f_em, 1.-pr->f_exp); ksprintf(&s, ";DP4=%d,%d,%d,%d;MQ=%d", a.d[0], a.d[1], a.d[2], a.d[3], a.mq); if (a.is_tested) { - if (pr->pc[0] >= 0.) ksprintf(&s, ";PC4=%.2lg,%.2lg,%.2lg,%.2lg", pr->pc[0], pr->pc[1], pr->pc[2], pr->pc[3]); + if (pr->pc[0] >= 0.) ksprintf(&s, ";PC4=%.4lf,%.4lf,%.2lg,%.2lg", pr->pc[0], pr->pc[1], pr->pc[2], pr->pc[3]); ksprintf(&s, ";PV4=%.2lg,%.2lg,%.2lg,%.2lg", a.p[0], a.p[1], a.p[2], a.p[3]); } if (pr->g[0] >= 0. && p_hwe <= .2) @@ -170,7 +170,7 @@ static int update_bcf1(int n_smpl, bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p kputs(b->fmt, &s); kputc('\0', &s); free(b->str); b->m_str = s.m; b->l_str = s.l; b->str = s.s; - b->qual = r < 1e-100? 99 : -3.434 * log(r); + b->qual = r < 1e-100? 99 : -4.343 * log(r); if (b->qual > 99) b->qual = 99; bcf_sync(n_smpl, b); if (!is_var) bcf_shrink_alt(n_smpl, b, 1); @@ -194,7 +194,7 @@ int bcfview(int argc, char *argv[]) tid = begin = end = -1; memset(&vc, 0, sizeof(viewconf_t)); - vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.9; + vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; while ((c = getopt(argc, argv, "1:l:cHAGvLbSuP:t:p:")) >= 0) { switch (c) { case '1': vc.n1 = atoi(optarg); break; @@ -285,12 +285,16 @@ int bcfview(int argc, char *argv[]) if (b->tid != tid || b->pos >= end) break; if (!(l > begin && end > b->pos)) continue; } + ++n_processed; if (vc.flag & VC_CALL) { bcf_p1rst_t pr; bcf_gl2pl(h->n_smpl, b); bcf_p1_cal(b, p1, &pr); // pr.g[3] is not calculated here if (vc.flag&VC_HWE) bcf_p1_cal_g3(p1, pr.g); - if ((n_processed + 1) % 50000 == 0) bcf_p1_dump_afs(p1); + if (n_processed % 100000 == 0) { + fprintf(stderr, "[%s] %ld sites processed.\n", __func__, (long)n_processed); + bcf_p1_dump_afs(p1); + } if (pr.p_ref >= vc.pref && (vc.flag & VC_VARONLY)) continue; update_bcf1(h->n_smpl, b, p1, &pr, vc.pref, vc.flag); } @@ -299,7 +303,6 @@ int bcfview(int argc, char *argv[]) b->fmt[0] = '\0'; } vcf_write(bout, h, b); - ++n_processed; } if (vc.prior_file) free(vc.prior_file); if (vc.flag & VC_CALL) bcf_p1_dump_afs(p1); diff --git a/bcftools/prob1.c b/bcftools/prob1.c index 1d0328f..37dc8ed 100644 --- a/bcftools/prob1.c +++ b/bcftools/prob1.c @@ -97,6 +97,8 @@ int bcf_p1_read_prior(bcf_p1aux_t *ma, const char *fn) for (k = 0; k <= ma->M; ++k) ma->phi[k] /= sum; for (k = 0; k <= ma->M; ++k) fprintf(stderr, " %d:%.3lg", k, ma->phi[ma->M - k]); fputc('\n', stderr); + for (sum = 0., k = 1; k <= ma->M; ++k) sum += k * ma->phi[ma->M - k]; + fprintf(stderr, "[heterozygosity] %lf\n", (double)sum / ma->M); return 0; } @@ -212,7 +214,7 @@ int bcf_p1_call_gt(const bcf_p1aux_t *ma, double f0, int k) } max = 1. - max; if (max < 1e-308) max = 1e-308; - q = (int)(-3.434 * log(max) + .499); + q = (int)(-4.343 * log(max) + .499); if (q > 99) q = 99; return q<<2|max_i; } @@ -326,18 +328,28 @@ static void contrast(bcf_p1aux_t *ma, double pc[4]) // mc_cal_y() must be called sum_alt += (long double)ma->phi[k] * ma->z[k]; // printf("* %lg, %lg *\n", (double)sum, (double)sum_alt); // DEBUG: sum should equal sum_alt sum = sum_alt; - y = lgamma(2*n1 + 1) - lgamma(ma->M + 1); - for (k = 1, x = 0.; k <= 2 * n2; ++k) - x += ma->phi[k] * ma->z2[k] * exp(lgamma(ma->M - k + 1) - lgamma(2*n2 - k + 1) + y); - pc[0] = ma->z1[0] * x / sum; + // the variant is specific to group2 +// printf("%lg %lg %lg %lg\n", ma->z[2*(n1+n2)]/exp(ma->t - (ma->t1 + ma->t2)), ma->z1[2*n1], ma->z2[2*n2], (double)sum); y = lgamma(2*n2 + 1) - lgamma(ma->M + 1); + for (k = 0, x = 0.; k < 2 * n2; ++k) + x += ma->phi[2*n1+k] * ma->z2[k] * expl(lgamma(2*n1 + k + 1) - lgamma(k + 1) + y); + pc[1] = ma->z1[2*n1] * x / sum; + for (k = 1, x = 0.; k <= 2 * n2; ++k) + x += ma->phi[k] * ma->z2[k] * expl(lgamma(ma->M - k + 1) - lgamma(2*n2 - k + 1) + y); + pc[1] += ma->z1[0] * x / sum; + // the variant is specific to group1 + y = lgamma(2*n1 + 1) - lgamma(ma->M + 1); + for (k = 0, x = 0.; k < 2 * n1; ++k) + x += ma->phi[2*n2+k] * ma->z1[k] * expl(lgamma(2*n2 + k + 1) - lgamma(k + 1) + y); + pc[0] = ma->z2[2*n2] * x / sum; for (k = 1, x = 0.; k <= 2 * n1; ++k) - x += ma->phi[k] * ma->z1[k] * exp(lgamma(ma->M - k + 1) - lgamma(2*n1 - k + 1) + y); - pc[1] = ma->z2[0] * x / sum; - for (k = 0; k < 4; ++k) { + x += ma->phi[k] * ma->z1[k] * expl(lgamma(ma->M - k + 1) - lgamma(2*n1 - k + 1) + y); + pc[0] += ma->z2[0] * x / sum; + // rescale + for (k = 2; k < 4; ++k) { y = 1. - pc[k]; if (y <= 0.) y = 1e-100; - pc[k] = (int)(-3.434 * log(y) + .499); + pc[k] = (int)(-4.343 * log(y) + .499); if (pc[k] > 99.) pc[k] = 99.; } } diff --git a/bcftools/vcfutils.pl b/bcftools/vcfutils.pl index 0fe1af8..ee9e998 100755 --- a/bcftools/vcfutils.pl +++ b/bcftools/vcfutils.pl @@ -116,6 +116,7 @@ Note: This command discards indels. Output: QUAL #non-indel #SNPs #transitions # my @t = split; next if (length($t[3]) != 1 || uc($t[3]) eq 'N'); my @s = split(',', $t[4]); + $t[5] = 3 if ($t[5] < 0); $t[3] = uc($t[3]); $t[4] = uc($t[4]); next if (length($s[0]) != 1); push(@a, [$t[5], ($t[4] eq '.' || $t[4] eq $t[3])? 0 : 1, $ts{$t[3].$s[0]}? 1 : 0, $h{$t[0],$t[1]}? 1 : 0]); @@ -131,7 +132,7 @@ Note: This command discards indels. Output: QUAL #non-indel #SNPs #transitions # my @x; $x[0] = sprintf("%.3f", $c[1]-$c[2]? $c[2] / ($c[1] - $c[2]) : 100); $x[1] = sprintf("%.3f", $hsize? $c[3] / $hsize : 0); - $x[2] = sprintf("%.3f", $c[3] / $c[0]); + $x[2] = sprintf("%.3f", $c[3] / $c[1]); print join("\t", $last, @c, @x), "\n"; $next = $c[0]/@a + $opts{s}; } -- 2.39.2