#define MINUS_CONST 0x10000000
#define INDEL_WINDOW_SIZE 50
#define MAX_SCORE 90
+#define MIN_SUPPORT_COEF 500
void *bcf_call_add_rg(void *_hash, const char *hdtext, const char *list)
{
if (s == n) return -1; // there is no indel at this position.
for (s = N = 0; s < n; ++s) N += n_plp[s]; // N is the total number of reads
{ // find out how many types of indels are present
- int m;
+ int m, n_alt = 0, n_tot = 0;
uint32_t *aux;
aux = calloc(N + 1, 4);
m = max_rd_len = 0;
for (s = 0; s < n; ++s) {
for (i = 0; i < n_plp[s]; ++i) {
const bam_pileup1_t *p = plp[s] + i;
- if (p->indel != 0 && (rghash == 0 || p->aux == 0))
- aux[m++] = MINUS_CONST + p->indel;
+ if (rghash == 0 || p->aux == 0) {
+ ++n_tot;
+ if (p->indel != 0) {
+ ++n_alt;
+ aux[m++] = MINUS_CONST + p->indel;
+ }
+ }
j = bam_cigar2qlen(&p->b->core, bam1_cigar(p->b));
if (j > max_rd_len) max_rd_len = j;
}
// squeeze out identical types
for (i = 1, n_types = 1; i < m; ++i)
if (aux[i] != aux[i-1]) ++n_types;
- if (n_types == 1) { // no indels
+ if (n_types == 1 || n_alt * MIN_SUPPORT_COEF < n_tot) { // no indels or too few supporting reads
free(aux); return -1;
}
types = (int*)calloc(n_types, sizeof(int));
&usage if (@ARGV < 1);
my $command = shift(@ARGV);
my %func = (subsam=>\&subsam, listsam=>\&listsam, fillac=>\&fillac, qstats=>\&qstats, varFilter=>\&varFilter,
- hapmap2vcf=>\&hapmap2vcf, ucscsnp2vcf=>\&ucscsnp2vcf, filter4vcf=>\&varFilter, ldstats=>\&ldstats);
+ hapmap2vcf=>\&hapmap2vcf, ucscsnp2vcf=>\&ucscsnp2vcf, filter4vcf=>\&varFilter, ldstats=>\&ldstats,
+ gapstats=>\&gapstats);
die("Unknown command \"$command\".\n") if (!defined($func{$command}));
&{$func{$command}};
}
}
}
+sub gapstats {
+ my (@c0, @c1);
+ $c0[$_] = $c1[$_] = 0 for (0 .. 10000);
+ while (<>) {
+ next if (/^#/);
+ my @t = split;
+ next if (length($t[3]) == 1 && $t[4] =~ /^[A-Za-z](,[A-Za-z])*$/); # not an indel
+ my @s = split(',', $t[4]);
+ for my $x (@s) {
+ my $l = length($x) - length($t[3]) + 5000;
+ $c0[$l] += 1 / @s;
+ }
+ }
+ for (my $i = 0; $i < 10000; ++$i) {
+ next if ($c0[$i] == 0);
+ printf("%d\t%.2f\n", ($i-5000), $c0[$i]);
+ }
+}
+
sub ucscsnp2vcf {
die("Usage: vcfutils.pl <in.ucsc.snp>\n") if (@ARGV == 0 && -t STDIN);
print "##fileformat=VCFv4.0\n";