* keep the raw depth because in indel calling, DP4 may be way off the true depth
* negative if we are looking at an indel. */
int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t *bca, bcf_callret1_t *r)
{
- int i, n, ref4, is_indel;
+ int i, n, ref4, is_indel, ori_depth = 0;
memset(r, 0, sizeof(bcf_callret1_t));
if (ref_base >= 0) {
ref4 = bam_nt16_nt4_table[ref_base];
int q, b, mapQ, baseQ, is_diff, min_dist, seqQ;
// set base
if (p->is_del || p->is_refskip || (p->b->core.flag&BAM_FUNMAP)) continue;
+ ++ori_depth;
baseQ = q = is_indel? p->aux&0xff : (int)bam1_qual(p->b)[p->qpos]; // base/indel quality
seqQ = is_indel? (p->aux>>8&0xff) : 99;
if (q < bca->min_baseQ) continue;
r->anno[3<<2|is_diff<<1|0] += min_dist;
r->anno[3<<2|is_diff<<1|1] += min_dist * min_dist;
}
- r->depth = n;
+ r->depth = n; r->ori_depth = ori_depth;
// glfgen
errmod_cal(bca->e, n, 5, bca->bases, r->p);
return r->depth;
}
// combine annotations
memset(call->anno, 0, 16 * sizeof(int));
- for (i = call->depth = 0, tmp = 0; i < n; ++i) {
+ for (i = call->depth = call->ori_depth = 0, tmp = 0; i < n; ++i) {
call->depth += calls[i].depth;
+ call->ori_depth += calls[i].ori_depth;
for (j = 0; j < 16; ++j) call->anno[j] += calls[i].anno[j];
}
return 0;
kputc('\0', &s);
// INFO
if (bc->ori_ref < 0) kputs("INDEL;", &s);
- kputs("I16=", &s);
+ kputs("DP=", &s); kputw(bc->ori_depth, &s); kputs(";I16=", &s);
for (i = 0; i < 16; ++i) {
if (i) kputc(',', &s);
kputw(bc->anno[i], &s);
} bcf_callaux_t;
typedef struct {
- int depth, qsum[4];
+ int depth, ori_depth, qsum[4];
int anno[16];
float p[25];
} bcf_callret1_t;
typedef struct {
int a[5]; // alleles: ref, alt, alt2, alt3
int n, n_alleles, shift, ori_ref, unseen;
- int anno[16], depth;
+ int anno[16], depth, ori_depth;
uint8_t *PL;
} bcf_call_t;
#endif
#ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.1.9-15 (r821)"
+#define PACKAGE_VERSION "0.1.9-16 (r822)"
#endif
int bam_taf2baf(int argc, char *argv[]);
my $flt = 0;
# parse annotations
my ($dp, $mq, $dp_alt) = (-1, -1, -1);
- if ($t[7] =~ /DP=(\d+)/i) {
- $dp = $1;
- } elsif ($t[7] =~ /DP4=(\d+),(\d+),(\d+),(\d+)/i) {
+ if ($t[7] =~ /DP4=(\d+),(\d+),(\d+),(\d+)/i) {
$dp = $1 + $2 + $3 + $4;
$dp_alt = $3 + $4;
}
+ if ($t[7] =~ /DP=(\d+)/i) {
+ $dp = $1;
+ }
$mq = $1 if ($t[7] =~ /MQ=(\d+)/i);
# the depth and mapQ filter
if ($dp >= 0) {
my @s = split(',', $t[4]);
for my $x (@s) {
my $l = length($x) - length($t[3]) + 5000;
+ if ($x =~ /^-/) {
+ $l = -(length($x) - 1) + 5000;
+ } elsif ($x =~ /^\+/) {
+ $l = length($x) - 1 + 5000;
+ }
$c0[$l] += 1 / @s;
}
}
for (my $i = 0; $i < 10000; ++$i) {
next if ($c0[$i] == 0);
- printf("%d\t%.2f\n", ($i-5000), $c0[$i]);
+ $c1[0] += $c0[$i];
+ $c1[1] += $c0[$i] if (($i-5000)%3 == 0);
+ printf("C\t%d\t%.2f\n", ($i-5000), $c0[$i]);
}
+ printf("3\t%d\t%d\t%.3f\n", $c1[0], $c1[1], $c1[1]/$c1[0]);
}
sub ucscsnp2vcf {
&usage if (@ARGV < 1);
my $command = shift(@ARGV);
-my %func = (showALEN=>\&showALEN, pileup2fq=>\&pileup2fq, varFilter=>\&varFilter,
+my %func = (showALEN=>\&showALEN, pileup2fq=>\&pileup2fq, varFilter=>\&varFilter, plp2vcf=>\&plp2vcf,
unique=>\&unique, uniqcmp=>\&uniqcmp, sra2hdr=>\&sra2hdr, sam2fq=>\&sam2fq);
die("Unknown command \"$command\".\n") if (!defined($func{$command}));
close($fh);
}
+sub plp2vcf {
+ while (<>) {
+ my @t = split;
+ next if ($t[3] eq '*/*');
+ if ($t[2] eq '*') { # indel
+ my @s = split("/", $t[3]);
+ my (@a, @b);
+ my ($ref, $alt);
+ for (@s) {
+ next if ($_ eq '*');
+ if (/^-/) {
+ push(@a, 'N'.substr($_, 1));
+ push(@b, 'N');
+ } elsif (/^\+/) {
+ push(@a, 'N');
+ push(@b, 'N'.substr($_, 1));
+ }
+ }
+ if ($a[0] && $a[1]) {
+ if (length($a[0]) < length($a[1])) {
+ $ref = $a[1];
+ $alt = ($b[0] . ('N' x (length($a[1]) - length($a[0])))) . ",$b[1]";
+ } elsif (length($a[0]) > length($a[1])) {
+ $ref = $a[0];
+ $alt = ($b[1] . ('N' x (length($a[0]) - length($a[1])))) . ",$b[0]";
+ } else {
+ $ref = $a[0];
+ $alt = ($b[0] eq $b[1])? $b[0] : "$b[0],$b[1]";
+ }
+ } else {
+ $ref = $a[0]; $alt = $b[0];
+ }
+ print join("\t", @t[0,1], '.', $ref, $alt, $t[5], '.', '.'), "\n";
+ } else { # SNP
+ }
+ }
+}
+
#
# Usage
#