From 82e06ed4289c179c7a0da9b8ffa31b15836ae763 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Sun, 3 Oct 2010 01:46:25 +0000 Subject: [PATCH] * vcfutils.pl qstats: calculate marginal ts/tv * allow to call genotypes at variant sites --- bcftools/bcf.c | 2 +- bcftools/call1.c | 17 ++++++++++++++++- bcftools/vcfutils.pl | 8 +++++++- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/bcftools/bcf.c b/bcftools/bcf.c index feeda57..35c3d94 100644 --- a/bcftools/bcf.c +++ b/bcftools/bcf.c @@ -190,7 +190,7 @@ int bcf_destroy(bcf1_t *b) int i; if (b == 0) return -1; free(b->str); - for (i = 0; i < b->n_gi; ++i) + for (i = 0; i < b->m_gi; ++i) free(b->gi[i].data); free(b->gi); free(b); diff --git a/bcftools/call1.c b/bcftools/call1.c index 5c6e477..a5c03f4 100644 --- a/bcftools/call1.c +++ b/bcftools/call1.c @@ -24,6 +24,7 @@ KSTREAM_INIT(gzFile, gzread, 16384) #define VC_KEEPALT 256 #define VC_ACGT_ONLY 512 #define VC_QCALL 1024 +#define VC_CALL_GT 2048 typedef struct { int flag, prior_type, n1; @@ -178,6 +179,18 @@ static int update_bcf1(int n_smpl, bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p if (!is_var) bcf_shrink_alt(b, 1); else if (!(flag&VC_KEEPALT)) bcf_shrink_alt(b, pr->rank0 < 2? 2 : pr->rank0+1); + if (is_var && (flag&VC_CALL_GT)) { // call individual genotype + int i, x, old_n_gi = b->n_gi; + s.m = b->m_str; s.l = b->l_str - 1; s.s = b->str; + kputs(":GT:GQ", &s); kputc('\0', &s); + b->m_str = s.m; b->l_str = s.l; b->str = s.s; + bcf_sync(b); + for (i = 0; i < b->n_smpl; ++i) { + x = bcf_p1_call_gt(pa, pr->f_em, i); + ((uint8_t*)b->gi[old_n_gi].data)[i] = (x&3) == 0? 1<<3|1 : (x&3) == 1? 1 : 0; + ((uint8_t*)b->gi[old_n_gi+1].data)[i] = x>>2; + } + } return is_var; } @@ -198,7 +211,7 @@ int bcfview(int argc, char *argv[]) tid = begin = end = -1; memset(&vc, 0, sizeof(viewconf_t)); vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; - while ((c = getopt(argc, argv, "N1:l:cHAGvLbSuP:t:p:Q")) >= 0) { + while ((c = getopt(argc, argv, "N1:l:cHAGvLbSuP:t:p:Qg")) >= 0) { switch (c) { case '1': vc.n1 = atoi(optarg); break; case 'l': vc.fn_list = strdup(optarg); break; @@ -212,6 +225,7 @@ int bcfview(int argc, char *argv[]) case 'v': vc.flag |= VC_VARONLY; break; case 'u': vc.flag |= VC_UNCOMP | VC_BCFOUT; break; case 'H': vc.flag |= VC_HWE; break; + case 'g': vc.flag |= VC_CALL_GT; break; case 't': vc.theta = atof(optarg); break; case 'p': vc.pref = atof(optarg); break; case 'Q': vc.flag |= VC_QCALL; break; @@ -232,6 +246,7 @@ int bcfview(int argc, char *argv[]) fprintf(stderr, " -S input is VCF\n"); fprintf(stderr, " -A keep all possible alternate alleles at variant sites\n"); fprintf(stderr, " -G suppress all individual genotype information\n"); + fprintf(stderr, " -g call genotypes for variant sites\n"); fprintf(stderr, " -L discard the PL genotype field\n"); fprintf(stderr, " -H perform Hardy-Weinberg test (slower)\n"); fprintf(stderr, " -v output potential variant sites only\n"); diff --git a/bcftools/vcfutils.pl b/bcftools/vcfutils.pl index 6fb1952..371e8d8 100755 --- a/bcftools/vcfutils.pl +++ b/bcftools/vcfutils.pl @@ -95,7 +95,7 @@ sub fillac { } sub qstats { - my %opts = (r=>'', s=>0.01, v=>undef); + my %opts = (r=>'', s=>0.02, v=>undef); getopts('r:s:v', \%opts); die("Usage: vcfutils.pl qstats [-r ref.vcf] \n Note: This command discards indels. Output: QUAL #non-indel #SNPs #transitions #joint ts/tv #joint/#ref #joint/#non-indel \n") if (@ARGV == 0 && -t STDIN); @@ -147,14 +147,20 @@ Note: This command discards indels. Output: QUAL #non-indel #SNPs #transitions # my $next = $opts{s}; my $last = $a[0]; my @c = (0, 0, 0, 0); + my @lc; + $lc[1] = $lc[2] = 0; for my $p (@a) { if ($p->[0] == -1 || ($p->[0] != $last && $c[0]/@a > $next)) { my @x; $x[0] = sprintf("%.4f", $c[1]-$c[2]? $c[2] / ($c[1] - $c[2]) : 100); $x[1] = sprintf("%.4f", $hsize? $c[3] / $hsize : 0); $x[2] = sprintf("%.4f", $c[3] / $c[1]); + my $a = $c[1] - $lc[1]; + my $b = $c[2] - $lc[2]; + $x[3] = sprintf("%.4f", $a-$b? $b / ($a-$b) : 100); print join("\t", $last, @c, @x), "\n"; $next = $c[0]/@a + $opts{s}; + $lc[1] = $c[1]; $lc[2] = $c[2]; } ++$c[0]; $c[1] += $p->[1]; $c[2] += $p->[2]; $c[3] += $p->[3]; $last = $p->[0]; -- 2.39.2