X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bcftools%2Fcall1.c;h=dda4164d111a55747d85f74fd814c90a4bab11d4;hb=3f820f8c565a8162d835a321a5c0ecdd9567d820;hp=415426abcd1511f1e2cadd987501efcbf21bcb74;hpb=74e6aebb5c07d14cdccc5f8abb40fd4a00b2be86;p=samtools.git diff --git a/bcftools/call1.c b/bcftools/call1.c index 415426a..dda4164 100644 --- a/bcftools/call1.c +++ b/bcftools/call1.c @@ -48,11 +48,6 @@ void *bed_read(const char *fn); void bed_destroy(void *_h); int bed_overlap(const void *_h, const char *chr, int beg, int end); -typedef struct { - double p[4]; - int mq, depth, is_tested, d[4]; -} anno16_t; - static double ttest(int n1, int n2, int a[4]) { extern double kf_betai(double a, double b, double x); @@ -83,7 +78,7 @@ static int test16_core(int anno[16], anno16_t *a) return 0; } -static int test16(bcf1_t *b, anno16_t *a) +int test16(bcf1_t *b, anno16_t *a) { char *p; int i, anno[16]; @@ -100,17 +95,6 @@ static int test16(bcf1_t *b, anno16_t *a) return test16_core(anno, a); } -static void rm_info(bcf1_t *b, const char *key) -{ - char *p, *q; - if ((p = strstr(b->info, key)) == 0) return; - for (q = p; *q && *q != ';'; ++q); - if (p > b->info && *(p-1) == ';') --p; - memmove(p, q, b->l_str - (q - b->str)); - b->l_str -= q - p; - bcf_sync(b); -} - static int update_bcf1(bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p1rst_t *pr, double pref, int flag, double em[10], int cons_llr, int64_t cons_gt) { kstring_t s; @@ -119,7 +103,7 @@ static int update_bcf1(bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p1rst_t *pr, anno16_t a; has_I16 = test16(b, &a) >= 0? 1 : 0; - rm_info(b, "I16="); // FIXME: probably this function has a bug. If I move it below, I16 will not be removed! + //rm_info(b, "I16="); // FIXME: probably this function has a bug. If I move it below, I16 will not be removed! memset(&s, 0, sizeof(kstring_t)); kputc('\0', &s); kputs(b->ref, &s); kputc('\0', &s); @@ -170,6 +154,8 @@ static int update_bcf1(bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p1rst_t *pr, } if (has_I16 && a.is_tested) ksprintf(&s, ";PV4=%.2g,%.2g,%.2g,%.2g", a.p[0], a.p[1], a.p[2], a.p[3]); kputc('\0', &s); + rm_info(&s, "QS="); + rm_info(&s, "I16="); kputs(b->fmt, &s); kputc('\0', &s); free(b->str); b->m_str = s.m; b->l_str = s.l; b->str = s.s; @@ -312,6 +298,9 @@ int bcfview(int argc, char *argv[]) extern int bcf_trio_call(uint32_t *prep, const bcf1_t *b, int *llr, int64_t *gt); extern int bcf_pair_call(const bcf1_t *b); extern int bcf_min_diff(const bcf1_t *b); + extern int bcf_p1_get_M(bcf_p1aux_t *b); + + extern gzFile bcf_p1_fp_lk; bcf_t *bp, *bout = 0; bcf1_t *b, *blast; @@ -327,10 +316,10 @@ int bcfview(int argc, char *argv[]) memset(&vc, 0, sizeof(viewconf_t)); vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; vc.indel_frac = -1.; vc.n_perm = 0; vc.min_perm_p = 0.01; vc.min_smpl_frac = 0; vc.min_lrt = 1; vc.min_ma_lrt = -1; memset(qcnt, 0, 8 * 256); - while ((c = getopt(argc, argv, "FN1:l:cC:eHAGvbSuP:t:p:QgLi:IMs:D:U:X:d:T:Ywm:")) >= 0) { + while ((c = getopt(argc, argv, "FN1:l:cC:eHAGvbSuP:t:p:QgLi:IMs:D:U:X:d:T:Ywm:K:")) >= 0) { switch (c) { case '1': vc.n1 = atoi(optarg); break; - case 'l': vc.bed = bed_read(optarg); break; + case 'l': vc.bed = bed_read(optarg); if (!vc.bed) { fprintf(stderr,"Could not read \"%s\"\n", optarg); return 1; } break; case 'D': vc.fn_dict = strdup(optarg); break; case 'F': vc.flag |= VC_FIX_PL; break; case 'N': vc.flag |= VC_ACGT_ONLY; break; @@ -357,6 +346,7 @@ int bcfview(int argc, char *argv[]) case 'C': vc.min_lrt = atof(optarg); break; case 'X': vc.min_perm_p = atof(optarg); break; case 'd': vc.min_smpl_frac = atof(optarg); break; + case 'K': bcf_p1_fp_lk = gzopen(optarg, "w"); break; case 's': vc.subsam = read_samples(optarg, &vc.n_sub); vc.ploidy = calloc(vc.n_sub + 1, 1); for (tid = 0; tid < vc.n_sub; ++tid) vc.ploidy[tid] = vc.subsam[tid][strlen(vc.subsam[tid]) + 1]; @@ -446,7 +436,7 @@ int bcfview(int argc, char *argv[]) vc.sublist = calloc(vc.n_sub, sizeof(int)); hout = bcf_hdr_subsam(hin, vc.n_sub, vc.subsam, vc.sublist); } - if (vc.flag & VC_CALL) write_header(hout); + write_header(hout); // always print the header vcf_hdr_write(bout, hout); } if (vc.flag & VC_CALL) { @@ -480,6 +470,10 @@ int bcfview(int argc, char *argv[]) } } } + if (bcf_p1_fp_lk && p1) { + int32_t M = bcf_p1_get_M(p1); + gzwrite(bcf_p1_fp_lk, &M, 4); + } while (vcf_read(bp, hin, b) > 0) { int is_indel, cons_llr = -1; int64_t cons_gt = -1; @@ -528,15 +522,19 @@ int bcfview(int argc, char *argv[]) int i; for (i = 0; i < 9; ++i) em[i] = -1.; } - bcf_p1_set_ploidy(b, p1); // could be improved: do this per site to allow pseudo-autosomal regions - if ( !(vc.flag&VC_KEEPALT) && vc.flag&VC_CALL && vc.min_ma_lrt>=0 ) + if ( !(vc.flag&VC_KEEPALT) && (vc.flag&VC_CALL) && vc.min_ma_lrt>=0 ) { + bcf_p1_set_ploidy(b, p1); // could be improved: do this per site to allow pseudo-autosomal regions int gts = call_multiallelic_gt(b,p1,vc.min_ma_lrt); if ( gts<=1 && vc.flag & VC_VARONLY ) continue; } else if (vc.flag & VC_CALL) { // call variants bcf_p1rst_t pr; - int calret = bcf_p1_cal(b, (em[7] >= 0 && em[7] < vc.min_lrt), p1, &pr); + int calret; + gzwrite(bcf_p1_fp_lk, &b->tid, 4); + gzwrite(bcf_p1_fp_lk, &b->pos, 4); + gzwrite(bcf_p1_fp_lk, &em[0], sizeof(double)); + calret = bcf_p1_cal(b, (em[7] >= 0 && em[7] < vc.min_lrt), p1, &pr); if (n_processed % 100000 == 0) { fprintf(stderr, "[%s] %ld sites processed.\n", __func__, (long)n_processed); bcf_p1_dump_afs(p1); @@ -581,6 +579,8 @@ int bcfview(int argc, char *argv[]) } else bcf_fix_gt(b); vcf_write(bout, hout, b); } + + if (bcf_p1_fp_lk) gzclose(bcf_p1_fp_lk); if (vc.prior_file) free(vc.prior_file); if (vc.flag & VC_CALL) bcf_p1_dump_afs(p1); if (hin != hout) bcf_hdr_destroy(hout);