#include <string.h>
+#include <math.h>
#include "bcf.h"
#include "kstring.h"
#include "khash.h"
b->fmt[0] = 'G'; b->fmt[1] = 'T'; b->fmt[2] = ':';
return 0;
}
+
+static void *locate_field(const bcf1_t *b, const char *fmt, int l)
+{
+ int i;
+ uint32_t tmp;
+ tmp = bcf_str2int(fmt, l);
+ for (i = 0; i < b->n_gi; ++i)
+ if (b->gi[i].fmt == tmp) break;
+ return i == b->n_gi? 0 : b->gi[i].data;
+}
+
+int bcf_anno_max(bcf1_t *b)
+{
+ int k, max_gq, max_sp, n_het;
+ kstring_t str;
+ uint8_t *gt, *gq, *sp;
+ max_gq = max_sp = n_het = 0;
+ gt = locate_field(b, "GT", 2);
+ if (gt == 0) return -1;
+ gq = locate_field(b, "GQ", 2);
+ sp = locate_field(b, "SP", 2);
+ if (sp)
+ for (k = 0; k < b->n_smpl; ++k)
+ if (gt[k]&0x3f)
+ max_sp = max_sp > (int)sp[k]? max_sp : sp[k];
+ if (gq)
+ for (k = 0; k < b->n_smpl; ++k)
+ if (gt[k]&0x3f)
+ max_gq = max_gq > (int)gq[k]? max_gq : gq[k];
+ for (k = 0; k < b->n_smpl; ++k) {
+ int a1, a2;
+ a1 = gt[k]&7; a2 = gt[k]>>3&7;
+ if ((!a1 && a2) || (!a2 && a1)) { // a het
+ if (gq == 0) ++n_het;
+ else if (gq[k] >= 20) ++n_het;
+ }
+ }
+ if (n_het) max_sp -= (int)(4.343 * log(n_het) + .499);
+ if (max_sp < 0) max_sp = 0;
+ memset(&str, 0, sizeof(kstring_t));
+ if (*b->info) kputc(';', &str);
+ ksprintf(&str, "MXSP=%d;MXGQ=%d", max_sp, max_gq);
+ bcf_append_info(b, str.s, str.l);
+ free(str.s);
+ return 0;
+}
#define VC_CALL_GT 2048
#define VC_ADJLD 4096
#define VC_NO_INDEL 8192
+#define VC_ANNO_MAX 16384
typedef struct {
int flag, prior_type, n1;
extern int bcf_2qcall(bcf_hdr_t *h, bcf1_t *b);
extern void bcf_p1_indel_prior(bcf_p1aux_t *ma, double x);
extern int bcf_fix_gt(bcf1_t *b);
+ extern int bcf_anno_max(bcf1_t *b);
bcf_t *bp, *bout = 0;
bcf1_t *b, *blast;
int c;
tid = begin = end = -1;
memset(&vc, 0, sizeof(viewconf_t));
vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; vc.indel_frac = -1.;
- while ((c = getopt(argc, argv, "N1:l:cHAGvbSuP:t:p:QgLi:I")) >= 0) {
+ while ((c = getopt(argc, argv, "N1:l:cHAGvbSuP:t:p:QgLi:IM")) >= 0) {
switch (c) {
case '1': vc.n1 = atoi(optarg); break;
case 'l': vc.fn_list = strdup(optarg); break;
case 'H': vc.flag |= VC_HWE; break;
case 'g': vc.flag |= VC_CALL_GT | VC_CALL; break;
case 'I': vc.flag |= VC_NO_INDEL; break;
+ case 'M': vc.flag |= VC_ANNO_MAX; break;
case 't': vc.theta = atof(optarg); break;
case 'p': vc.pref = atof(optarg); break;
case 'i': vc.indel_frac = atof(optarg); break;
}
bcf_cpy(blast, b);
}
+ if (vc.flag & VC_ANNO_MAX) bcf_anno_max(b);
if (vc.flag & VC_NO_GENO) { // do not output GENO fields
b->n_gi = 0;
b->fmt[0] = '\0';
- }
- bcf_fix_gt(b);
+ b->l_str = b->fmt - b->str + 1;
+ } else bcf_fix_gt(b);
vcf_write(bout, h, b);
}
if (vc.prior_file) free(vc.prior_file);
}
sub varFilter {
- my %opts = (d=>2, D=>10000, a=>2, W=>10, Q=>10, w=>10, p=>undef, 1=>1e-4, 2=>1e-100, 3=>0, 4=>1e-4);
- getopts('pd:D:W:Q:w:a:1:2:3:4:', \%opts);
+ my %opts = (d=>2, D=>10000, a=>2, W=>10, Q=>10, w=>10, p=>undef, 1=>1e-4, 2=>1e-100, 3=>0, 4=>1e-4, G=>0, S=>1000);
+ getopts('pd:D:W:Q:w:a:1:2:3:4:G:S:', \%opts);
die(qq/
Usage: vcfutils.pl varFilter [options] <in.vcf>
$flt = 1 if ($flt == 0 && $mq >= 0 && $mq < $opts{Q});
$flt = 7 if ($flt == 0 && /PV4=([^,]+),([^,]+),([^,]+),([^,;\t]+)/
&& ($1<$opts{1} || $2<$opts{2} || $3<$opts{3} || $4<$opts{4}));
+ $flt = 8 if ($flt == 0 && ((/MXGQ=(\d+)/ && $1 >= $opts{G}) || (/MXSP=(\d+)/ && $1 < $opts{S})));
my $score = $t[5] * 100 + $dp_alt;
my $rlen = length($t[3]) - 1; # $indel_score<0 for SNPs
if ($first->[1] == 0) {
print join("\t", @$first[3 .. @$first-1]), "\n";
} elsif ($is_print) {
- print STDERR join("\t", substr("UQdDaGgPM", $first->[1], 1), @$first[3 .. @$first-1]), "\n";
+ print STDERR join("\t", substr("UQdDaGgPMS", $first->[1], 1), @$first[3 .. @$first-1]), "\n";
}
}