X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bcftools%2Fcall1.c;h=c5d466e12b164d3df39b093b1cd07377207e2285;hb=955605d6cf50290e32ff923da397021361ceb596;hp=e8ecb51a3e0b73d0c4020133f8caedba43938298;hpb=390bbfbca2df79acf521706409a258895dfe72b1;p=samtools.git diff --git a/bcftools/call1.c b/bcftools/call1.c index e8ecb51..c5d466e 100644 --- a/bcftools/call1.c +++ b/bcftools/call1.c @@ -21,6 +21,7 @@ KSTREAM_INIT(gzFile, gzread, 16384) #define VC_VCFIN 32 #define VC_UNCOMP 64 #define VC_HWE 128 +#define VC_KEEPALT 256 typedef struct { int flag, prior_type, n1; @@ -141,7 +142,7 @@ static void rm_info(int n_smpl, bcf1_t *b, const char *key) bcf_sync(n_smpl, b); } -static int update_bcf1(int n_smpl, bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p1rst_t *pr, double pref) +static int update_bcf1(int n_smpl, bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p1rst_t *pr, double pref, int flag) { kstring_t s; int is_var = (pr->p_ref < pref); @@ -154,16 +155,13 @@ static int update_bcf1(int n_smpl, bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p memset(&s, 0, sizeof(kstring_t)); kputc('\0', &s); kputs(b->ref, &s); kputc('\0', &s); - if (is_var) { - kputs(b->alt, &s); - } - kputc('\0', &s); kputc('\0', &s); + kputs(b->alt, &s); kputc('\0', &s); kputc('\0', &s); kputs(b->info, &s); if (b->info[0]) kputc(';', &s); ksprintf(&s, "AF1=%.3lf;AFE=%.3lf", 1.-pr->f_em, 1.-pr->f_exp); ksprintf(&s, ";DP4=%d,%d,%d,%d;MQ=%d", a.d[0], a.d[1], a.d[2], a.d[3], a.mq); if (a.is_tested) { - if (pr->pc[0] >= 0.) ksprintf(&s, ";PC4=%.2lg,%.2lg,%.2lg,%.2lg", pr->pc[0], pr->pc[1], pr->pc[2], pr->pc[3]); + if (pr->pc[0] >= 0.) ksprintf(&s, ";PC4=%.4lf,%.4lf,%.2lg,%.2lg", pr->pc[0], pr->pc[1], pr->pc[2], pr->pc[3]); ksprintf(&s, ";PV4=%.2lg,%.2lg,%.2lg,%.2lg", a.p[0], a.p[1], a.p[2], a.p[3]); } if (pr->g[0] >= 0. && p_hwe <= .2) @@ -172,9 +170,12 @@ static int update_bcf1(int n_smpl, bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p kputs(b->fmt, &s); kputc('\0', &s); free(b->str); b->m_str = s.m; b->l_str = s.l; b->str = s.s; - b->qual = r < 1e-100? 99 : -3.434 * log(r); + b->qual = r < 1e-100? 99 : -4.343 * log(r); if (b->qual > 99) b->qual = 99; bcf_sync(n_smpl, b); + if (!is_var) bcf_shrink_alt(n_smpl, b, 1); + else if (!(flag&VC_KEEPALT)) + bcf_shrink_alt(n_smpl, b, pr->rank0 < 2? 2 : pr->rank0+1); return is_var; } @@ -193,13 +194,14 @@ int bcfview(int argc, char *argv[]) tid = begin = end = -1; memset(&vc, 0, sizeof(viewconf_t)); - vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.9; - while ((c = getopt(argc, argv, "1:l:cHGvLbSuP:t:p:")) >= 0) { + vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; + while ((c = getopt(argc, argv, "1:l:cHAGvLbSuP:t:p:")) >= 0) { switch (c) { case '1': vc.n1 = atoi(optarg); break; case 'l': vc.fn_list = strdup(optarg); break; case 'G': vc.flag |= VC_NO_GENO; break; case 'L': vc.flag |= VC_NO_PL; break; + case 'A': vc.flag |= VC_KEEPALT; break; case 'b': vc.flag |= VC_BCFOUT; break; case 'S': vc.flag |= VC_VCFIN; break; case 'c': vc.flag |= VC_CALL; break; @@ -223,6 +225,7 @@ int bcfview(int argc, char *argv[]) fprintf(stderr, " -b output BCF instead of VCF\n"); fprintf(stderr, " -u uncompressed BCF output\n"); fprintf(stderr, " -S input is VCF\n"); + fprintf(stderr, " -A keep all possible alternate alleles at variant sites\n"); fprintf(stderr, " -G suppress all individual genotype information\n"); fprintf(stderr, " -L discard the PL genotype field\n"); fprintf(stderr, " -H perform Hardy-Weinberg test (slower)\n"); @@ -282,20 +285,24 @@ int bcfview(int argc, char *argv[]) if (b->tid != tid || b->pos >= end) break; if (!(l > begin && end > b->pos)) continue; } + ++n_processed; if (vc.flag & VC_CALL) { bcf_p1rst_t pr; + bcf_gl2pl(h->n_smpl, b); bcf_p1_cal(b, p1, &pr); // pr.g[3] is not calculated here if (vc.flag&VC_HWE) bcf_p1_cal_g3(p1, pr.g); - if ((n_processed + 1) % 50000 == 0) bcf_p1_dump_afs(p1); + if (n_processed % 100000 == 0) { + fprintf(stderr, "[%s] %ld sites processed.\n", __func__, (long)n_processed); + bcf_p1_dump_afs(p1); + } if (pr.p_ref >= vc.pref && (vc.flag & VC_VARONLY)) continue; - update_bcf1(h->n_smpl, b, p1, &pr, vc.pref); + update_bcf1(h->n_smpl, b, p1, &pr, vc.pref, vc.flag); } if (vc.flag & VC_NO_GENO) { b->n_gi = 0; b->fmt[0] = '\0'; } vcf_write(bout, h, b); - ++n_processed; } if (vc.prior_file) free(vc.prior_file); if (vc.flag & VC_CALL) bcf_p1_dump_afs(p1);