X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bcftools%2Fvcfutils.pl;h=2b7ba0b1d00932be1f79f08e4b3bd8e8db951295;hb=dbc7644895129440a9b1d7b448d5d8046b7af5d4;hp=5df8b0c05e1275e7edc4b8063efa5c585ee7a80a;hpb=5f5ed2477797cbd8b0c3cdd16d8abbaa2319d300;p=samtools.git diff --git a/bcftools/vcfutils.pl b/bcftools/vcfutils.pl index 5df8b0c..2b7ba0b 100755 --- a/bcftools/vcfutils.pl +++ b/bcftools/vcfutils.pl @@ -86,7 +86,7 @@ sub fillac { print; } else { my @t = split; - my @c = (0); + my @c = (0, 0); my $n = 0; my $s = -1; @_ = split(":", $t[8]); @@ -215,8 +215,8 @@ Note: This command discards indels. Output: QUAL #non-indel #SNPs #transitions # } sub varFilter { - my %opts = (d=>2, D=>10000, a=>2, W=>10, Q=>10, w=>10, p=>undef, 1=>1e-4, 2=>1e-100, 3=>0, 4=>1e-4, G=>0, S=>1000); - getopts('pd:D:W:Q:w:a:1:2:3:4:G:S:', \%opts); + my %opts = (d=>2, D=>10000000, a=>2, W=>10, Q=>10, w=>3, p=>undef, 1=>1e-4, 2=>1e-100, 3=>0, 4=>1e-4, G=>0, S=>1000, e=>1e-4); + getopts('pd:D:W:Q:w:a:1:2:3:4:G:S:e:', \%opts); die(qq/ Usage: vcfutils.pl varFilter [options] @@ -230,6 +230,7 @@ Options: -Q INT minimum RMS mapping quality for SNPs [$opts{Q}] -2 FLOAT min P-value for baseQ bias [$opts{2}] -3 FLOAT min P-value for mapQ bias [$opts{3}] -4 FLOAT min P-value for end distance bias [$opts{4}] + -e FLOAT min P-value for HWE (plus F<0) [$opts{e}] -p print filtered variants Note: Some of the filters rely on annotations generated by SAMtools\/BCFtools. @@ -246,6 +247,7 @@ Note: Some of the filters rely on annotations generated by SAMtools\/BCFtools. print; next; } next if ($t[4] eq '.'); # skip non-var sites + next if ($t[3] eq 'N'); # skip sites with unknown ref ('N') # check if the site is a SNP my $type = 1; # SNP if (length($t[3]) > 1) { @@ -289,7 +291,13 @@ Note: Some of the filters rely on annotations generated by SAMtools\/BCFtools. $flt = 1 if ($flt == 0 && $mq >= 0 && $mq < $opts{Q}); $flt = 7 if ($flt == 0 && /PV4=([^,]+),([^,]+),([^,]+),([^,;\t]+)/ && ($1<$opts{1} || $2<$opts{2} || $3<$opts{3} || $4<$opts{4})); - $flt = 8 if ($flt == 0 && ((/MXGQ=(\d+)/ && $1 >= $opts{G}) || (/MXSP=(\d+)/ && $1 < $opts{S}))); + $flt = 8 if ($flt == 0 && ((/MXGQ=(\d+)/ && $1 < $opts{G}) || (/MXSP=(\d+)/ && $1 >= $opts{S}))); + # HWE filter + if ($t[7] =~ /G3=([^;,]+),([^;,]+),([^;,]+).*HWE=([^;,]+)/ && $4 < $opts{e}) { + my $p = 2*$1 + $2; + my $f = ($p > 0 && $p < 1)? 1 - $2 / ($p * (1-$p)) : 0; + $flt = 9 if ($f < 0); + } my $score = $t[5] * 100 + $dp_alt; my $rlen = length($t[3]) - 1; # $indel_score<0 for SNPs @@ -312,7 +320,10 @@ Note: Some of the filters rely on annotations generated by SAMtools\/BCFtools. } else { # SNP or MNP for my $x (@staging) { next if (($x->[0]&3) != 3 || $x->[4] + $x->[2] + $ow < $t[1]); - $flt = 5; + if ($x->[4] + length($x->[7]) - 1 == $t[1] && substr($x->[7], -1, 1) eq substr($t[4], 0, 1) + && length($x->[7]) - length($x->[6]) == 1) { + $x->[1] = 5; + } else { $flt = 5; } last; } # check MNP @@ -495,7 +506,7 @@ Options: -d INT minimum depth [$opts{d}] my ($b, $q); $q = $1 if ($t[7] =~ /FQ=(-?[\d\.]+)/); if ($q < 0) { - $_ = $1 if ($t[7] =~ /AF1=([\d\.]+)/); + $_ = ($t[7] =~ /AF1=([\d\.]+)/)? $1 : 0; $b = ($_ < .5 || $alt eq '.')? $ref : $alt; $q = -$q; } else { @@ -508,7 +519,7 @@ Options: -d INT minimum depth [$opts{d}] $q = chr($q <= 126? $q : 126); $seq .= $b; $qual .= $q; - } else { # an INDEL + } elsif ($t[4] ne '.') { # an INDEL push(@gaps, [$t[1], length($t[3])]); } $last_pos = $t[1];