&usage if (@ARGV < 1);
my $command = shift(@ARGV);
my %func = (subsam=>\&subsam, listsam=>\&listsam, fillac=>\&fillac, qstats=>\&qstats, varFilter=>\&varFilter,
- hapmap2vcf=>\&hapmap2vcf, ucscsnp2vcf=>\&ucscsnp2vcf, filter4vcf=>\&varFilter, ldstats=>\&ldstats);
+ hapmap2vcf=>\&hapmap2vcf, ucscsnp2vcf=>\&ucscsnp2vcf, filter4vcf=>\&varFilter, ldstats=>\&ldstats,
+ gapstats=>\&gapstats);
die("Unknown command \"$command\".\n") if (!defined($func{$command}));
&{$func{$command}};
}
my $flt = 0;
# parse annotations
my ($dp, $mq, $dp_alt) = (-1, -1, -1);
- if ($t[7] =~ /DP=(\d+)/i) {
- $dp = $1;
- } elsif ($t[7] =~ /DP4=(\d+),(\d+),(\d+),(\d+)/i) {
+ if ($t[7] =~ /DP4=(\d+),(\d+),(\d+),(\d+)/i) {
$dp = $1 + $2 + $3 + $4;
$dp_alt = $3 + $4;
}
+ if ($t[7] =~ /DP=(\d+)/i) {
+ $dp = $1;
+ }
$mq = $1 if ($t[7] =~ /MQ=(\d+)/i);
# the depth and mapQ filter
if ($dp >= 0) {
}
}
+sub gapstats {
+ my (@c0, @c1);
+ $c0[$_] = $c1[$_] = 0 for (0 .. 10000);
+ while (<>) {
+ next if (/^#/);
+ my @t = split;
+ next if (length($t[3]) == 1 && $t[4] =~ /^[A-Za-z](,[A-Za-z])*$/); # not an indel
+ my @s = split(',', $t[4]);
+ for my $x (@s) {
+ my $l = length($x) - length($t[3]) + 5000;
+ if ($x =~ /^-/) {
+ $l = -(length($x) - 1) + 5000;
+ } elsif ($x =~ /^\+/) {
+ $l = length($x) - 1 + 5000;
+ }
+ $c0[$l] += 1 / @s;
+ }
+ }
+ for (my $i = 0; $i < 10000; ++$i) {
+ next if ($c0[$i] == 0);
+ $c1[0] += $c0[$i];
+ $c1[1] += $c0[$i] if (($i-5000)%3 == 0);
+ printf("C\t%d\t%.2f\n", ($i-5000), $c0[$i]);
+ }
+ printf("3\t%d\t%d\t%.3f\n", $c1[0], $c1[1], $c1[1]/$c1[0]);
+}
+
sub ucscsnp2vcf {
die("Usage: vcfutils.pl <in.ucsc.snp>\n") if (@ARGV == 0 && -t STDIN);
print "##fileformat=VCFv4.0\n";