X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bcftools%2Fcall1.c;h=dda4164d111a55747d85f74fd814c90a4bab11d4;hb=3f820f8c565a8162d835a321a5c0ecdd9567d820;hp=6c530085ba591a23aeae98918361cb10745793a6;hpb=93d8193370bf8516f6a60937aa0d6bbecafe8a69;p=samtools.git diff --git a/bcftools/call1.c b/bcftools/call1.c index 6c53008..dda4164 100644 --- a/bcftools/call1.c +++ b/bcftools/call1.c @@ -40,7 +40,7 @@ typedef struct { uint32_t *trio_aux; char *prior_file, **subsam, *fn_dict; uint8_t *ploidy; - double theta, pref, indel_frac, min_perm_p, min_smpl_frac, min_lrt; + double theta, pref, indel_frac, min_perm_p, min_smpl_frac, min_lrt, min_ma_lrt; void *bed; } viewconf_t; @@ -48,11 +48,6 @@ void *bed_read(const char *fn); void bed_destroy(void *_h); int bed_overlap(const void *_h, const char *chr, int beg, int end); -typedef struct { - double p[4]; - int mq, depth, is_tested, d[4]; -} anno16_t; - static double ttest(int n1, int n2, int a[4]) { extern double kf_betai(double a, double b, double x); @@ -83,7 +78,7 @@ static int test16_core(int anno[16], anno16_t *a) return 0; } -static int test16(bcf1_t *b, anno16_t *a) +int test16(bcf1_t *b, anno16_t *a) { char *p; int i, anno[16]; @@ -100,17 +95,6 @@ static int test16(bcf1_t *b, anno16_t *a) return test16_core(anno, a); } -static void rm_info(bcf1_t *b, const char *key) -{ - char *p, *q; - if ((p = strstr(b->info, key)) == 0) return; - for (q = p; *q && *q != ';'; ++q); - if (p > b->info && *(p-1) == ';') --p; - memmove(p, q, b->l_str - (q - b->str)); - b->l_str -= q - p; - bcf_sync(b); -} - static int update_bcf1(bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p1rst_t *pr, double pref, int flag, double em[10], int cons_llr, int64_t cons_gt) { kstring_t s; @@ -119,7 +103,7 @@ static int update_bcf1(bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p1rst_t *pr, anno16_t a; has_I16 = test16(b, &a) >= 0? 1 : 0; - rm_info(b, "I16="); // FIXME: probably this function has a bug. If I move it below, I16 will not be removed! + //rm_info(b, "I16="); // FIXME: probably this function has a bug. If I move it below, I16 will not be removed! memset(&s, 0, sizeof(kstring_t)); kputc('\0', &s); kputs(b->ref, &s); kputc('\0', &s); @@ -170,6 +154,8 @@ static int update_bcf1(bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p1rst_t *pr, } if (has_I16 && a.is_tested) ksprintf(&s, ";PV4=%.2g,%.2g,%.2g,%.2g", a.p[0], a.p[1], a.p[2], a.p[3]); kputc('\0', &s); + rm_info(&s, "QS="); + rm_info(&s, "I16="); kputs(b->fmt, &s); kputc('\0', &s); free(b->str); b->m_str = s.m; b->l_str = s.l; b->str = s.s; @@ -250,6 +236,12 @@ static void write_header(bcf_hdr_t *h) kputs("##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); + if (!strstr(str.s, "##INFO=\n", &str); + if (!strstr(str.s, "##INFO=\n", &str); + if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO== 0) { + while ((c = getopt(argc, argv, "FN1:l:cC:eHAGvbSuP:t:p:QgLi:IMs:D:U:X:d:T:Ywm:K:")) >= 0) { switch (c) { case '1': vc.n1 = atoi(optarg); break; - case 'l': vc.bed = bed_read(optarg); break; + case 'l': vc.bed = bed_read(optarg); if (!vc.bed) { fprintf(stderr,"Could not read \"%s\"\n", optarg); return 1; } break; case 'D': vc.fn_dict = strdup(optarg); break; case 'F': vc.flag |= VC_FIX_PL; break; case 'N': vc.flag |= VC_ACGT_ONLY; break; @@ -341,6 +336,7 @@ int bcfview(int argc, char *argv[]) case 'w': vc.flag |= VC_INDEL_ONLY; break; case 'M': vc.flag |= VC_ANNO_MAX; break; case 'Y': vc.flag |= VC_QCNT; break; + case 'm': vc.min_ma_lrt = atof(optarg); break; case 't': vc.theta = atof(optarg); break; case 'p': vc.pref = atof(optarg); break; case 'i': vc.indel_frac = atof(optarg); break; @@ -350,6 +346,7 @@ int bcfview(int argc, char *argv[]) case 'C': vc.min_lrt = atof(optarg); break; case 'X': vc.min_perm_p = atof(optarg); break; case 'd': vc.min_smpl_frac = atof(optarg); break; + case 'K': bcf_p1_fp_lk = gzopen(optarg, "w"); break; case 's': vc.subsam = read_samples(optarg, &vc.n_sub); vc.ploidy = calloc(vc.n_sub + 1, 1); for (tid = 0; tid < vc.n_sub; ++tid) vc.ploidy[tid] = vc.subsam[tid][strlen(vc.subsam[tid]) + 1]; @@ -396,6 +393,7 @@ int bcfview(int argc, char *argv[]) fprintf(stderr, " -g call genotypes at variant sites (force -c)\n"); fprintf(stderr, " -i FLOAT indel-to-substitution ratio [%.4g]\n", vc.indel_frac); fprintf(stderr, " -I skip indels\n"); + fprintf(stderr, " -m FLOAT alternative model for multiallelic and rare-variant calling, include if P(chi^2)>=FLOAT\n"); fprintf(stderr, " -p FLOAT variant if P(ref|D) 0) { int is_indel, cons_llr = -1; int64_t cons_gt = -1; @@ -520,9 +522,19 @@ int bcfview(int argc, char *argv[]) int i; for (i = 0; i < 9; ++i) em[i] = -1.; } - if (vc.flag & VC_CALL) { // call variants + if ( !(vc.flag&VC_KEEPALT) && (vc.flag&VC_CALL) && vc.min_ma_lrt>=0 ) + { + bcf_p1_set_ploidy(b, p1); // could be improved: do this per site to allow pseudo-autosomal regions + int gts = call_multiallelic_gt(b,p1,vc.min_ma_lrt); + if ( gts<=1 && vc.flag & VC_VARONLY ) continue; + } + else if (vc.flag & VC_CALL) { // call variants bcf_p1rst_t pr; - int calret = bcf_p1_cal(b, (em[7] >= 0 && em[7] < vc.min_lrt), p1, &pr); + int calret; + gzwrite(bcf_p1_fp_lk, &b->tid, 4); + gzwrite(bcf_p1_fp_lk, &b->pos, 4); + gzwrite(bcf_p1_fp_lk, &em[0], sizeof(double)); + calret = bcf_p1_cal(b, (em[7] >= 0 && em[7] < vc.min_lrt), p1, &pr); if (n_processed % 100000 == 0) { fprintf(stderr, "[%s] %ld sites processed.\n", __func__, (long)n_processed); bcf_p1_dump_afs(p1); @@ -567,6 +579,8 @@ int bcfview(int argc, char *argv[]) } else bcf_fix_gt(b); vcf_write(bout, hout, b); } + + if (bcf_p1_fp_lk) gzclose(bcf_p1_fp_lk); if (vc.prior_file) free(vc.prior_file); if (vc.flag & VC_CALL) bcf_p1_dump_afs(p1); if (hin != hout) bcf_hdr_destroy(hout);