+sub gapstats {
+ my (@c0, @c1);
+ $c0[$_] = $c1[$_] = 0 for (0 .. 10000);
+ while (<>) {
+ next if (/^#/);
+ my @t = split;
+ next if (length($t[3]) == 1 && $t[4] =~ /^[A-Za-z](,[A-Za-z])*$/); # not an indel
+ my @s = split(',', $t[4]);
+ for my $x (@s) {
+ my $l = length($x) - length($t[3]) + 5000;
+ if ($x =~ /^-/) {
+ $l = -(length($x) - 1) + 5000;
+ } elsif ($x =~ /^\+/) {
+ $l = length($x) - 1 + 5000;
+ }
+ $c0[$l] += 1 / @s;
+ }
+ }
+ for (my $i = 0; $i < 10000; ++$i) {
+ next if ($c0[$i] == 0);
+ $c1[0] += $c0[$i];
+ $c1[1] += $c0[$i] if (($i-5000)%3 == 0);
+ printf("C\t%d\t%.2f\n", ($i-5000), $c0[$i]);
+ }
+ printf("3\t%d\t%d\t%.3f\n", $c1[0], $c1[1], $c1[1]/$c1[0]);
+}
+