From 95b7e2d5d54f2b10bef8dd937e43c342d2086b46 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Mon, 15 Nov 2010 05:30:18 +0000 Subject: [PATCH] * samtools-0.1.9-16 (r822) * keep the raw depth because in indel calling, DP4 may be way off the true depth --- bam2bcf.c | 10 ++++++---- bam2bcf.h | 4 ++-- bamtk.c | 2 +- bcftools/vcfutils.pl | 17 +++++++++++++---- misc/samtools.pl | 40 +++++++++++++++++++++++++++++++++++++++- 5 files changed, 61 insertions(+), 12 deletions(-) diff --git a/bam2bcf.c b/bam2bcf.c index 8537e23..088635c 100644 --- a/bam2bcf.c +++ b/bam2bcf.c @@ -36,7 +36,7 @@ void bcf_call_destroy(bcf_callaux_t *bca) * negative if we are looking at an indel. */ int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t *bca, bcf_callret1_t *r) { - int i, n, ref4, is_indel; + int i, n, ref4, is_indel, ori_depth = 0; memset(r, 0, sizeof(bcf_callret1_t)); if (ref_base >= 0) { ref4 = bam_nt16_nt4_table[ref_base]; @@ -56,6 +56,7 @@ int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t int q, b, mapQ, baseQ, is_diff, min_dist, seqQ; // set base if (p->is_del || p->is_refskip || (p->b->core.flag&BAM_FUNMAP)) continue; + ++ori_depth; baseQ = q = is_indel? p->aux&0xff : (int)bam1_qual(p->b)[p->qpos]; // base/indel quality seqQ = is_indel? (p->aux>>8&0xff) : 99; if (q < bca->min_baseQ) continue; @@ -86,7 +87,7 @@ int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t r->anno[3<<2|is_diff<<1|0] += min_dist; r->anno[3<<2|is_diff<<1|1] += min_dist * min_dist; } - r->depth = n; + r->depth = n; r->ori_depth = ori_depth; // glfgen errmod_cal(bca->e, n, 5, bca->bases, r->p); return r->depth; @@ -160,8 +161,9 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/, } // combine annotations memset(call->anno, 0, 16 * sizeof(int)); - for (i = call->depth = 0, tmp = 0; i < n; ++i) { + for (i = call->depth = call->ori_depth = 0, tmp = 0; i < n; ++i) { call->depth += calls[i].depth; + call->ori_depth += calls[i].ori_depth; for (j = 0; j < 16; ++j) call->anno[j] += calls[i].anno[j]; } return 0; @@ -212,7 +214,7 @@ int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bc kputc('\0', &s); // INFO if (bc->ori_ref < 0) kputs("INDEL;", &s); - kputs("I16=", &s); + kputs("DP=", &s); kputw(bc->ori_depth, &s); kputs(";I16=", &s); for (i = 0; i < 16; ++i) { if (i) kputc(',', &s); kputw(bc->anno[i], &s); diff --git a/bam2bcf.h b/bam2bcf.h index e0673e5..26b022c 100644 --- a/bam2bcf.h +++ b/bam2bcf.h @@ -21,7 +21,7 @@ typedef struct __bcf_callaux_t { } bcf_callaux_t; typedef struct { - int depth, qsum[4]; + int depth, ori_depth, qsum[4]; int anno[16]; float p[25]; } bcf_callret1_t; @@ -29,7 +29,7 @@ typedef struct { typedef struct { int a[5]; // alleles: ref, alt, alt2, alt3 int n, n_alleles, shift, ori_ref, unseen; - int anno[16], depth; + int anno[16], depth, ori_depth; uint8_t *PL; } bcf_call_t; diff --git a/bamtk.c b/bamtk.c index 23711e3..cb7b8a0 100644 --- a/bamtk.c +++ b/bamtk.c @@ -9,7 +9,7 @@ #endif #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.9-15 (r821)" +#define PACKAGE_VERSION "0.1.9-16 (r822)" #endif int bam_taf2baf(int argc, char *argv[]); diff --git a/bcftools/vcfutils.pl b/bcftools/vcfutils.pl index 1270c6c..bbc479b 100755 --- a/bcftools/vcfutils.pl +++ b/bcftools/vcfutils.pl @@ -249,12 +249,13 @@ Note: Some of the filters rely on annotations generated by SAMtools\/BCFtools. my $flt = 0; # parse annotations my ($dp, $mq, $dp_alt) = (-1, -1, -1); - if ($t[7] =~ /DP=(\d+)/i) { - $dp = $1; - } elsif ($t[7] =~ /DP4=(\d+),(\d+),(\d+),(\d+)/i) { + if ($t[7] =~ /DP4=(\d+),(\d+),(\d+),(\d+)/i) { $dp = $1 + $2 + $3 + $4; $dp_alt = $3 + $4; } + if ($t[7] =~ /DP=(\d+)/i) { + $dp = $1; + } $mq = $1 if ($t[7] =~ /MQ=(\d+)/i); # the depth and mapQ filter if ($dp >= 0) { @@ -324,13 +325,21 @@ sub gapstats { my @s = split(',', $t[4]); for my $x (@s) { my $l = length($x) - length($t[3]) + 5000; + if ($x =~ /^-/) { + $l = -(length($x) - 1) + 5000; + } elsif ($x =~ /^\+/) { + $l = length($x) - 1 + 5000; + } $c0[$l] += 1 / @s; } } for (my $i = 0; $i < 10000; ++$i) { next if ($c0[$i] == 0); - printf("%d\t%.2f\n", ($i-5000), $c0[$i]); + $c1[0] += $c0[$i]; + $c1[1] += $c0[$i] if (($i-5000)%3 == 0); + printf("C\t%d\t%.2f\n", ($i-5000), $c0[$i]); } + printf("3\t%d\t%d\t%.3f\n", $c1[0], $c1[1], $c1[1]/$c1[0]); } sub ucscsnp2vcf { diff --git a/misc/samtools.pl b/misc/samtools.pl index ce8449d..d03c1c7 100755 --- a/misc/samtools.pl +++ b/misc/samtools.pl @@ -10,7 +10,7 @@ my $version = '0.3.3'; &usage if (@ARGV < 1); my $command = shift(@ARGV); -my %func = (showALEN=>\&showALEN, pileup2fq=>\&pileup2fq, varFilter=>\&varFilter, +my %func = (showALEN=>\&showALEN, pileup2fq=>\&pileup2fq, varFilter=>\&varFilter, plp2vcf=>\&plp2vcf, unique=>\&unique, uniqcmp=>\&uniqcmp, sra2hdr=>\&sra2hdr, sam2fq=>\&sam2fq); die("Unknown command \"$command\".\n") if (!defined($func{$command})); @@ -473,6 +473,44 @@ sub uniqcmp_aux { close($fh); } +sub plp2vcf { + while (<>) { + my @t = split; + next if ($t[3] eq '*/*'); + if ($t[2] eq '*') { # indel + my @s = split("/", $t[3]); + my (@a, @b); + my ($ref, $alt); + for (@s) { + next if ($_ eq '*'); + if (/^-/) { + push(@a, 'N'.substr($_, 1)); + push(@b, 'N'); + } elsif (/^\+/) { + push(@a, 'N'); + push(@b, 'N'.substr($_, 1)); + } + } + if ($a[0] && $a[1]) { + if (length($a[0]) < length($a[1])) { + $ref = $a[1]; + $alt = ($b[0] . ('N' x (length($a[1]) - length($a[0])))) . ",$b[1]"; + } elsif (length($a[0]) > length($a[1])) { + $ref = $a[0]; + $alt = ($b[1] . ('N' x (length($a[0]) - length($a[1])))) . ",$b[0]"; + } else { + $ref = $a[0]; + $alt = ($b[0] eq $b[1])? $b[0] : "$b[0],$b[1]"; + } + } else { + $ref = $a[0]; $alt = $b[0]; + } + print join("\t", @t[0,1], '.', $ref, $alt, $t[5], '.', '.'), "\n"; + } else { # SNP + } + } +} + # # Usage # -- 2.39.2