print;
} else {
my @t = split;
- my @c = (0);
+ my @c = (0, 0);
my $n = 0;
my $s = -1;
@_ = split(":", $t[8]);
}
sub varFilter {
- my %opts = (d=>2, D=>10000000, a=>2, W=>10, Q=>10, w=>10, p=>undef, 1=>1e-4, 2=>1e-100, 3=>0, 4=>1e-4, G=>0, S=>1000);
- getopts('pd:D:W:Q:w:a:1:2:3:4:G:S:', \%opts);
+ my %opts = (d=>2, D=>10000000, a=>2, W=>10, Q=>10, w=>3, p=>undef, 1=>1e-4, 2=>1e-100, 3=>0, 4=>1e-4, G=>0, S=>1000, e=>1e-4);
+ getopts('pd:D:W:Q:w:a:1:2:3:4:G:S:e:', \%opts);
die(qq/
Usage: vcfutils.pl varFilter [options] <in.vcf>
-2 FLOAT min P-value for baseQ bias [$opts{2}]
-3 FLOAT min P-value for mapQ bias [$opts{3}]
-4 FLOAT min P-value for end distance bias [$opts{4}]
+ -e FLOAT min P-value for HWE (plus F<0) [$opts{e}]
-p print filtered variants
Note: Some of the filters rely on annotations generated by SAMtools\/BCFtools.
$flt = 7 if ($flt == 0 && /PV4=([^,]+),([^,]+),([^,]+),([^,;\t]+)/
&& ($1<$opts{1} || $2<$opts{2} || $3<$opts{3} || $4<$opts{4}));
$flt = 8 if ($flt == 0 && ((/MXGQ=(\d+)/ && $1 < $opts{G}) || (/MXSP=(\d+)/ && $1 >= $opts{S})));
+ # HWE filter
+ if ($t[7] =~ /G3=([^;,]+),([^;,]+),([^;,]+).*HWE=([^;,]+)/ && $4 < $opts{e}) {
+ my $p = 2*$1 + $2;
+ my $f = ($p > 0 && $p < 1)? 1 - $2 / ($p * (1-$p)) : 0;
+ $flt = 9 if ($f < 0);
+ }
my $score = $t[5] * 100 + $dp_alt;
my $rlen = length($t[3]) - 1; # $indel_score<0 for SNPs
my ($b, $q);
$q = $1 if ($t[7] =~ /FQ=(-?[\d\.]+)/);
if ($q < 0) {
- $_ = $1 if ($t[7] =~ /AF1=([\d\.]+)/);
+ $_ = ($t[7] =~ /AF1=([\d\.]+)/)? $1 : 0;
$b = ($_ < .5 || $alt eq '.')? $ref : $alt;
$q = -$q;
} else {