From: Heng Li Date: Wed, 18 Aug 2010 13:55:20 +0000 (+0000) Subject: allow to read the prior from the error output. EM iteration is working. X-Git-Url: https://git.donarmstrong.com/?p=samtools.git;a=commitdiff_plain;h=b6f550edaa9384857879894bf01399fad76dac37 allow to read the prior from the error output. EM iteration is working. --- diff --git a/bcftools/prob1.c b/bcftools/prob1.c index 4afda56..4611af1 100644 --- a/bcftools/prob1.c +++ b/bcftools/prob1.c @@ -2,8 +2,12 @@ #include #include #include +#include #include "prob1.h" +#include "kseq.h" +KSTREAM_INIT(gzFile, gzread, 16384) + #define MC_AVG_ERR 0.007 #define MC_MAX_EM_ITER 16 #define MC_EM_EPS 1e-4 @@ -54,6 +58,43 @@ void bcf_p1_init_prior(bcf_p1aux_t *ma, int type, double theta) } } +int bcf_p1_read_prior(bcf_p1aux_t *ma, const char *fn) +{ + gzFile fp; + kstring_t s; + kstream_t *ks; + long double sum; + int dret, k; + memset(&s, 0, sizeof(kstring_t)); + fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r"); + ks = ks_init(fp); + memset(ma->phi, 0, sizeof(double) * (ma->M + 1)); + while (ks_getuntil(ks, '\n', &s, &dret) >= 0) { + if (strstr(s.s, "[afs] ") == s.s) { + char *p = s.s + 6; + for (k = 0; k <= ma->M; ++k) { + int x; + double y; + x = strtol(p, &p, 10); + if (x != k && (errno == EINVAL || errno == ERANGE)) return -1; + ++p; + y = strtod(p, &p); + if (y == 0. && (errno == EINVAL || errno == ERANGE)) return -1; + ma->phi[ma->M - k] += y; + } + } + } + ks_destroy(ks); + gzclose(fp); + free(s.s); + for (sum = 0., k = 0; k <= ma->M; ++k) sum += ma->phi[k]; + fprintf(stderr, "[prior]"); + for (k = 0; k <= ma->M; ++k) ma->phi[k] /= sum; + for (k = 0; k <= ma->M; ++k) fprintf(stderr, " %d:%.3lg", k, ma->phi[ma->M - k]); + fputc('\n', stderr); + return 0; +} + bcf_p1aux_t *bcf_p1_init(int n) // FIXME: assuming diploid { bcf_p1aux_t *ma; diff --git a/bcftools/prob1.h b/bcftools/prob1.h index 96beb04..358f551 100644 --- a/bcftools/prob1.h +++ b/bcftools/prob1.h @@ -25,6 +25,7 @@ extern "C" { int bcf_p1_cal(bcf1_t *b, bcf_p1aux_t *ma, bcf_p1rst_t *rst); int bcf_p1_call_gt(const bcf_p1aux_t *ma, double f0, int k); void bcf_p1_dump_afs(bcf_p1aux_t *ma); + int bcf_p1_read_prior(bcf_p1aux_t *ma, const char *fn); #ifdef __cplusplus } diff --git a/bcftools/vcfout.c b/bcftools/vcfout.c index fd817f9..f54fde5 100644 --- a/bcftools/vcfout.c +++ b/bcftools/vcfout.c @@ -23,7 +23,7 @@ KSTREAM_INIT(gzFile, gzread, 16384) typedef struct { int flag, prior_type; - char *fn_list; + char *fn_list, *prior_file; double theta, pref; } viewconf_t; @@ -124,10 +124,8 @@ static int update_bcf1(int n_smpl, bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p } kputc('\0', &s); kputc('\0', &s); kputs(b->info, &s); - if (p_dp >= 0. && d[2] + d[3] > 0) { // has at least one non-reference base - if (b->info[0]) kputc(';', &s); - ksprintf(&s, "AF1=%.3lf;AFE=%.3lf", 1.-pr->f_em, 1.-pr->f_exp); - } + if (b->info[0]) kputc(';', &s); + ksprintf(&s, "AF1=%.3lf;AFE=%.3lf", 1.-pr->f_em, 1.-pr->f_exp); if (p_hwe <= .2) ksprintf(&s, ";GC=%.2lf,%.2lf,%.2lf;HWE=%.3lf", pr->g[2], pr->g[1], pr->g[0], p_hwe); if (p_dp >= 0. && p_dp <= .2) ksprintf(&s, ";TDP=%.3lf", p_dp); if (p_ed >= 0. && p_ed <= .2) ksprintf(&s, ";TED=%.3lf", p_ed); @@ -173,6 +171,7 @@ int bcfview(int argc, char *argv[]) if (strcmp(optarg, "full") == 0) vc.prior_type = MC_PTYPE_FULL; else if (strcmp(optarg, "cond2") == 0) vc.prior_type = MC_PTYPE_COND2; else if (strcmp(optarg, "flat") == 0) vc.prior_type = MC_PTYPE_FLAT; + else vc.prior_file = strdup(optarg); break; } } @@ -206,7 +205,12 @@ int bcfview(int argc, char *argv[]) vcf_hdr_write(bout, h); if (vc.flag & VC_CALL) { p1 = bcf_p1_init(h->n_smpl); - bcf_p1_init_prior(p1, vc.prior_type, vc.theta); + if (vc.prior_file) { + if (bcf_p1_read_prior(p1, vc.prior_file) < 0) { + fprintf(stderr, "[%s] fail to read the prior AFS.\n", __func__); + return 1; + } + } else bcf_p1_init_prior(p1, vc.prior_type, vc.theta); } if (vc.fn_list) hash = bcf_load_pos(vc.fn_list, h); if (optind + 1 < argc && !(vc.flag&VC_VCFIN)) { @@ -248,6 +252,7 @@ int bcfview(int argc, char *argv[]) vcf_write(bout, h, b); ++n_processed; } + if (vc.prior_file) free(vc.prior_file); if (vc.flag & VC_CALL) bcf_p1_dump_afs(p1); bcf_hdr_destroy(h); bcf_destroy(b);