#define VC_VCFIN 32
#define VC_UNCOMP 64
#define VC_HWE 128
+#define VC_KEEPALT 256
typedef struct {
int flag, prior_type, n1;
bcf_sync(n_smpl, b);
}
-static int update_bcf1(int n_smpl, bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p1rst_t *pr, double pref)
+static int update_bcf1(int n_smpl, bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p1rst_t *pr, double pref, int flag)
{
kstring_t s;
int is_var = (pr->p_ref < pref);
memset(&s, 0, sizeof(kstring_t));
kputc('\0', &s); kputs(b->ref, &s); kputc('\0', &s);
- if (is_var) {
- kputs(b->alt, &s);
- }
- kputc('\0', &s); kputc('\0', &s);
+ kputs(b->alt, &s); kputc('\0', &s); kputc('\0', &s);
kputs(b->info, &s);
if (b->info[0]) kputc(';', &s);
ksprintf(&s, "AF1=%.3lf;AFE=%.3lf", 1.-pr->f_em, 1.-pr->f_exp);
ksprintf(&s, ";DP4=%d,%d,%d,%d;MQ=%d", a.d[0], a.d[1], a.d[2], a.d[3], a.mq);
if (a.is_tested) {
- if (pr->pc[0] >= 0.) ksprintf(&s, ";PC4=%.2lg,%.2lg,%.2lg,%.2lg", pr->pc[0], pr->pc[1], pr->pc[2], pr->pc[3]);
+ if (pr->pc[0] >= 0.) ksprintf(&s, ";PC4=%.4lf,%.4lf,%.2lg,%.2lg", pr->pc[0], pr->pc[1], pr->pc[2], pr->pc[3]);
ksprintf(&s, ";PV4=%.2lg,%.2lg,%.2lg,%.2lg", a.p[0], a.p[1], a.p[2], a.p[3]);
}
if (pr->g[0] >= 0. && p_hwe <= .2)
kputs(b->fmt, &s); kputc('\0', &s);
free(b->str);
b->m_str = s.m; b->l_str = s.l; b->str = s.s;
- b->qual = r < 1e-100? 99 : -3.434 * log(r);
+ b->qual = r < 1e-100? 99 : -4.343 * log(r);
if (b->qual > 99) b->qual = 99;
bcf_sync(n_smpl, b);
+ if (!is_var) bcf_shrink_alt(n_smpl, b, 1);
+ else if (!(flag&VC_KEEPALT))
+ bcf_shrink_alt(n_smpl, b, pr->rank0 < 2? 2 : pr->rank0+1);
return is_var;
}
tid = begin = end = -1;
memset(&vc, 0, sizeof(viewconf_t));
- vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.9;
- while ((c = getopt(argc, argv, "1:l:cHGvLbSuP:t:p:")) >= 0) {
+ vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5;
+ while ((c = getopt(argc, argv, "1:l:cHAGvLbSuP:t:p:")) >= 0) {
switch (c) {
case '1': vc.n1 = atoi(optarg); break;
case 'l': vc.fn_list = strdup(optarg); break;
case 'G': vc.flag |= VC_NO_GENO; break;
case 'L': vc.flag |= VC_NO_PL; break;
+ case 'A': vc.flag |= VC_KEEPALT; break;
case 'b': vc.flag |= VC_BCFOUT; break;
case 'S': vc.flag |= VC_VCFIN; break;
case 'c': vc.flag |= VC_CALL; break;
fprintf(stderr, " -b output BCF instead of VCF\n");
fprintf(stderr, " -u uncompressed BCF output\n");
fprintf(stderr, " -S input is VCF\n");
+ fprintf(stderr, " -A keep all possible alternate alleles at variant sites\n");
fprintf(stderr, " -G suppress all individual genotype information\n");
fprintf(stderr, " -L discard the PL genotype field\n");
fprintf(stderr, " -H perform Hardy-Weinberg test (slower)\n");
if (b->tid != tid || b->pos >= end) break;
if (!(l > begin && end > b->pos)) continue;
}
+ ++n_processed;
if (vc.flag & VC_CALL) {
bcf_p1rst_t pr;
+ bcf_gl2pl(h->n_smpl, b);
bcf_p1_cal(b, p1, &pr); // pr.g[3] is not calculated here
if (vc.flag&VC_HWE) bcf_p1_cal_g3(p1, pr.g);
- if ((n_processed + 1) % 50000 == 0) bcf_p1_dump_afs(p1);
+ if (n_processed % 100000 == 0) {
+ fprintf(stderr, "[%s] %ld sites processed.\n", __func__, (long)n_processed);
+ bcf_p1_dump_afs(p1);
+ }
if (pr.p_ref >= vc.pref && (vc.flag & VC_VARONLY)) continue;
- update_bcf1(h->n_smpl, b, p1, &pr, vc.pref);
+ update_bcf1(h->n_smpl, b, p1, &pr, vc.pref, vc.flag);
}
if (vc.flag & VC_NO_GENO) {
b->n_gi = 0;
b->fmt[0] = '\0';
}
vcf_write(bout, h, b);
- ++n_processed;
}
if (vc.prior_file) free(vc.prior_file);
if (vc.flag & VC_CALL) bcf_p1_dump_afs(p1);