X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bcftools%2Fcall1.c;h=bc1cd407e7967290051b4ffbf4c73938468a43dd;hb=496174d0b1c432375fd71b46ebdbecff78ef3f1a;hp=69bd5f7700698dfe39207695328220a4d8eba248;hpb=5329dbb6df3eee6a46d875b9d50e7f1a869f6b25;p=samtools.git diff --git a/bcftools/call1.c b/bcftools/call1.c index 69bd5f7..bc1cd40 100644 --- a/bcftools/call1.c +++ b/bcftools/call1.c @@ -33,13 +33,14 @@ KSTREAM_INIT(gzFile, gzread, 16384) #define VC_EM 0x10000 #define VC_PAIRCALL 0x20000 #define VC_QCNT 0x40000 +#define VC_INDEL_ONLY 0x80000 typedef struct { int flag, prior_type, n1, n_sub, *sublist, n_perm; uint32_t *trio_aux; char *prior_file, **subsam, *fn_dict; uint8_t *ploidy; - double theta, pref, indel_frac, min_perm_p, min_smpl_frac, min_lrt; + double theta, pref, indel_frac, min_perm_p, min_smpl_frac, min_lrt, min_ma_lrt; void *bed; } viewconf_t; @@ -47,11 +48,6 @@ void *bed_read(const char *fn); void bed_destroy(void *_h); int bed_overlap(const void *_h, const char *chr, int beg, int end); -typedef struct { - double p[4]; - int mq, depth, is_tested, d[4]; -} anno16_t; - static double ttest(int n1, int n2, int a[4]) { extern double kf_betai(double a, double b, double x); @@ -82,7 +78,7 @@ static int test16_core(int anno[16], anno16_t *a) return 0; } -static int test16(bcf1_t *b, anno16_t *a) +int test16(bcf1_t *b, anno16_t *a) { char *p; int i, anno[16]; @@ -99,17 +95,6 @@ static int test16(bcf1_t *b, anno16_t *a) return test16_core(anno, a); } -static void rm_info(bcf1_t *b, const char *key) -{ - char *p, *q; - if ((p = strstr(b->info, key)) == 0) return; - for (q = p; *q && *q != ';'; ++q); - if (p > b->info && *(p-1) == ';') --p; - memmove(p, q, b->l_str - (q - b->str)); - b->l_str -= q - p; - bcf_sync(b); -} - static int update_bcf1(bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p1rst_t *pr, double pref, int flag, double em[10], int cons_llr, int64_t cons_gt) { kstring_t s; @@ -118,7 +103,7 @@ static int update_bcf1(bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p1rst_t *pr, anno16_t a; has_I16 = test16(b, &a) >= 0? 1 : 0; - rm_info(b, "I16="); // FIXME: probably this function has a bug. If I move it below, I16 will not be removed! + //rm_info(b, "I16="); // FIXME: probably this function has a bug. If I move it below, I16 will not be removed! memset(&s, 0, sizeof(kstring_t)); kputc('\0', &s); kputs(b->ref, &s); kputc('\0', &s); @@ -131,7 +116,6 @@ static int update_bcf1(bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p1rst_t *pr, if (em[5] >= 0 && em[6] >= 0) ksprintf(&s, ";AF2=%.4g,%.4g", 1 - em[5], 1 - em[6]); if (em[7] >= 0) ksprintf(&s, ";LRT=%.3g", em[7]); if (em[8] >= 0) ksprintf(&s, ";LRT2=%.3g", em[8]); - //if (em[9] >= 0) ksprintf(&s, ";LRT1=%.3g", em[9]); } if (cons_llr > 0) { ksprintf(&s, ";CLR=%d", cons_llr); @@ -170,6 +154,8 @@ static int update_bcf1(bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p1rst_t *pr, } if (has_I16 && a.is_tested) ksprintf(&s, ";PV4=%.2g,%.2g,%.2g,%.2g", a.p[0], a.p[1], a.p[2], a.p[3]); kputc('\0', &s); + rm_info(&s, "QS="); + rm_info(&s, "I16="); kputs(b->fmt, &s); kputc('\0', &s); free(b->str); b->m_str = s.m; b->l_str = s.l; b->str = s.s; @@ -250,6 +236,12 @@ static void write_header(bcf_hdr_t *h) kputs("##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); + if (!strstr(str.s, "##INFO=\n", &str); + if (!strstr(str.s, "##INFO=\n", &str); + if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##INFO=\n", &str); + if (!strstr(str.s, "##INFO=\n", &str); if (!strstr(str.s, "##FORMAT=\n", &str); if (!strstr(str.s, "##FORMAT=\n", &str); if (!strstr(str.s, "##FORMAT=\n", &str); + if (!strstr(str.s, "##FORMAT=\n", &str); if (!strstr(str.s, "##FORMAT=\n", &str); if (!strstr(str.s, "##FORMAT=\n", &str); + kputs("##FORMAT=\n", &str); h->l_txt = str.l + 1; h->txt = str.s; } @@ -302,6 +298,9 @@ int bcfview(int argc, char *argv[]) extern int bcf_trio_call(uint32_t *prep, const bcf1_t *b, int *llr, int64_t *gt); extern int bcf_pair_call(const bcf1_t *b); extern int bcf_min_diff(const bcf1_t *b); + extern int bcf_p1_get_M(bcf_p1aux_t *b); + + extern gzFile bcf_p1_fp_lk; bcf_t *bp, *bout = 0; bcf1_t *b, *blast; @@ -315,12 +314,12 @@ int bcfview(int argc, char *argv[]) tid = begin = end = -1; memset(&vc, 0, sizeof(viewconf_t)); - vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; vc.indel_frac = -1.; vc.n_perm = 0; vc.min_perm_p = 0.01; vc.min_smpl_frac = 0; vc.min_lrt = 1; + vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; vc.indel_frac = -1.; vc.n_perm = 0; vc.min_perm_p = 0.01; vc.min_smpl_frac = 0; vc.min_lrt = 1; vc.min_ma_lrt = -1; memset(qcnt, 0, 8 * 256); - while ((c = getopt(argc, argv, "FN1:l:cC:eHAGvbSuP:t:p:QgLi:IMs:D:U:X:d:T:Y")) >= 0) { + while ((c = getopt(argc, argv, "FN1:l:cC:eHAGvbSuP:t:p:QgLi:IMs:D:U:X:d:T:Ywm:K:")) >= 0) { switch (c) { case '1': vc.n1 = atoi(optarg); break; - case 'l': vc.bed = bed_read(optarg); break; + case 'l': vc.bed = bed_read(optarg); if (!vc.bed) fprintf(stderr,"Could not read \"%s\"\n", optarg); return 1; break; case 'D': vc.fn_dict = strdup(optarg); break; case 'F': vc.flag |= VC_FIX_PL; break; case 'N': vc.flag |= VC_ACGT_ONLY; break; @@ -334,8 +333,10 @@ int bcfview(int argc, char *argv[]) case 'u': vc.flag |= VC_UNCOMP | VC_BCFOUT; break; case 'g': vc.flag |= VC_CALL_GT | VC_CALL; break; case 'I': vc.flag |= VC_NO_INDEL; break; + case 'w': vc.flag |= VC_INDEL_ONLY; break; case 'M': vc.flag |= VC_ANNO_MAX; break; case 'Y': vc.flag |= VC_QCNT; break; + case 'm': vc.min_ma_lrt = atof(optarg); break; case 't': vc.theta = atof(optarg); break; case 'p': vc.pref = atof(optarg); break; case 'i': vc.indel_frac = atof(optarg); break; @@ -345,6 +346,7 @@ int bcfview(int argc, char *argv[]) case 'C': vc.min_lrt = atof(optarg); break; case 'X': vc.min_perm_p = atof(optarg); break; case 'd': vc.min_smpl_frac = atof(optarg); break; + case 'K': bcf_p1_fp_lk = gzopen(optarg, "w"); break; case 's': vc.subsam = read_samples(optarg, &vc.n_sub); vc.ploidy = calloc(vc.n_sub + 1, 1); for (tid = 0; tid < vc.n_sub; ++tid) vc.ploidy[tid] = vc.subsam[tid][strlen(vc.subsam[tid]) + 1]; @@ -391,6 +393,7 @@ int bcfview(int argc, char *argv[]) fprintf(stderr, " -g call genotypes at variant sites (force -c)\n"); fprintf(stderr, " -i FLOAT indel-to-substitution ratio [%.4g]\n", vc.indel_frac); fprintf(stderr, " -I skip indels\n"); + fprintf(stderr, " -m FLOAT alternative model for multiallelic and rare-variant calling, include if P(chi^2)>=FLOAT\n"); fprintf(stderr, " -p FLOAT variant if P(ref|D) 0) { int is_indel, cons_llr = -1; int64_t cons_gt = -1; @@ -481,6 +488,7 @@ int bcfview(int argc, char *argv[]) if (vc.flag & VC_FIX_PL) bcf_fix_pl(b); is_indel = bcf_is_indel(b); if ((vc.flag & VC_NO_INDEL) && is_indel) continue; + if ((vc.flag & VC_INDEL_ONLY) && !is_indel) continue; if ((vc.flag & VC_ACGT_ONLY) && !is_indel) { int x; if (b->ref[0] == 0 || b->ref[1] != 0) continue; @@ -495,7 +503,7 @@ int bcfview(int argc, char *argv[]) if (!(l > begin && end > b->pos)) continue; } ++n_processed; - if (vc.flag & VC_QCNT) { // summarize the difference + if ((vc.flag & VC_QCNT) && !is_indel) { // summarize the difference int x = bcf_min_diff(b); if (x > 255) x = 255; if (x >= 0) ++qcnt[x]; @@ -514,9 +522,19 @@ int bcfview(int argc, char *argv[]) int i; for (i = 0; i < 9; ++i) em[i] = -1.; } - if (vc.flag & VC_CALL) { // call variants + if ( !(vc.flag&VC_KEEPALT) && (vc.flag&VC_CALL) && vc.min_ma_lrt>=0 ) + { + bcf_p1_set_ploidy(b, p1); // could be improved: do this per site to allow pseudo-autosomal regions + int gts = call_multiallelic_gt(b,p1,vc.min_ma_lrt); + if ( gts<=1 && vc.flag & VC_VARONLY ) continue; + } + else if (vc.flag & VC_CALL) { // call variants bcf_p1rst_t pr; - int calret = bcf_p1_cal(b, (em[7] >= 0 && em[7] < vc.min_lrt), p1, &pr); + int calret; + gzwrite(bcf_p1_fp_lk, &b->tid, 4); + gzwrite(bcf_p1_fp_lk, &b->pos, 4); + gzwrite(bcf_p1_fp_lk, &em[0], sizeof(double)); + calret = bcf_p1_cal(b, (em[7] >= 0 && em[7] < vc.min_lrt), p1, &pr); if (n_processed % 100000 == 0) { fprintf(stderr, "[%s] %ld sites processed.\n", __func__, (long)n_processed); bcf_p1_dump_afs(p1); @@ -561,6 +579,8 @@ int bcfview(int argc, char *argv[]) } else bcf_fix_gt(b); vcf_write(bout, hout, b); } + + if (bcf_p1_fp_lk) gzclose(bcf_p1_fp_lk); if (vc.prior_file) free(vc.prior_file); if (vc.flag & VC_CALL) bcf_p1_dump_afs(p1); if (hin != hout) bcf_hdr_destroy(hout);