X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bcftools%2Fprob1.c;h=4611af1f995fff8832300af10f733bffa41dd29d;hb=b6f550edaa9384857879894bf01399fad76dac37;hp=1acd49b48295d4d315f52c2f3969bd6c3b66e36f;hpb=b65b44f2a48848bfcc1c1f2e738251819bcbb518;p=samtools.git diff --git a/bcftools/prob1.c b/bcftools/prob1.c index 1acd49b..4611af1 100644 --- a/bcftools/prob1.c +++ b/bcftools/prob1.c @@ -2,8 +2,12 @@ #include #include #include +#include #include "prob1.h" +#include "kseq.h" +KSTREAM_INIT(gzFile, gzread, 16384) + #define MC_AVG_ERR 0.007 #define MC_MAX_EM_ITER 16 #define MC_EM_EPS 1e-4 @@ -54,6 +58,43 @@ void bcf_p1_init_prior(bcf_p1aux_t *ma, int type, double theta) } } +int bcf_p1_read_prior(bcf_p1aux_t *ma, const char *fn) +{ + gzFile fp; + kstring_t s; + kstream_t *ks; + long double sum; + int dret, k; + memset(&s, 0, sizeof(kstring_t)); + fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r"); + ks = ks_init(fp); + memset(ma->phi, 0, sizeof(double) * (ma->M + 1)); + while (ks_getuntil(ks, '\n', &s, &dret) >= 0) { + if (strstr(s.s, "[afs] ") == s.s) { + char *p = s.s + 6; + for (k = 0; k <= ma->M; ++k) { + int x; + double y; + x = strtol(p, &p, 10); + if (x != k && (errno == EINVAL || errno == ERANGE)) return -1; + ++p; + y = strtod(p, &p); + if (y == 0. && (errno == EINVAL || errno == ERANGE)) return -1; + ma->phi[ma->M - k] += y; + } + } + } + ks_destroy(ks); + gzclose(fp); + free(s.s); + for (sum = 0., k = 0; k <= ma->M; ++k) sum += ma->phi[k]; + fprintf(stderr, "[prior]"); + for (k = 0; k <= ma->M; ++k) ma->phi[k] /= sum; + for (k = 0; k <= ma->M; ++k) fprintf(stderr, " %d:%.3lg", k, ma->phi[ma->M - k]); + fputc('\n', stderr); + return 0; +} + bcf_p1aux_t *bcf_p1_init(int n) // FIXME: assuming diploid { bcf_p1aux_t *ma; @@ -148,20 +189,22 @@ int bcf_p1_call_gt(const bcf_p1aux_t *ma, double f0, int k) static void mc_cal_y(bcf_p1aux_t *ma) { - double *z[2], *tmp, *pdg, last_min, last_max; - int k, j; + double *z[2], *tmp, *pdg; + int k, j, last_min, last_max; z[0] = ma->z; z[1] = ma->zswap; pdg = ma->pdg; - z[0][0] = 1.; z[0][1] = z[0][2] = 0.; + memset(z[0], 0, sizeof(double) * (ma->M + 1)); + memset(z[1], 0, sizeof(double) * (ma->M + 1)); + z[0][0] = 1.; last_min = last_max = 0; for (j = 0; j < ma->n; ++j) { int _min = last_min, _max = last_max; double p[3], sum; pdg = ma->pdg + j * 3; p[0] = pdg[0]; p[1] = 2. * pdg[1]; p[2] = pdg[2]; -// for (; _min < _max && z[0][_min] < TINY; ++_min) z[1][_min] = 0.; -// for (; _max > _min && z[0][_max] < TINY; --_max) z[1][_max] = 0.; + for (; _min < _max && z[0][_min] < TINY; ++_min) z[0][_min] = z[1][_min] = 0.; + for (; _max > _min && z[0][_max] < TINY; --_max) z[0][_max] = z[1][_max] = 0.; _max += 2; if (_min == 0) k = 0, z[1][k] = (2*j+2-k)*(2*j-k+1) * p[0] * z[0][k]; @@ -173,6 +216,8 @@ static void mc_cal_y(bcf_p1aux_t *ma) + k*(k-1)* p[2] * z[0][k-2]; for (k = _min, sum = 0.; k <= _max; ++k) sum += z[1][k]; for (k = _min; k <= _max; ++k) z[1][k] /= sum; + if (_min >= 1) z[1][_min-1] = 0.; + if (_min >= 2) z[1][_min-2] = 0.; if (j < ma->n - 1) z[1][_max+1] = z[1][_max+2] = 0.; tmp = z[0]; z[0] = z[1]; z[1] = tmp; last_min = _min; last_max = _max;