X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bam_mcns.c;h=cd64b0ac139814706632a05d2e7b75d619a36b8a;hb=d73a1b3b94255044787c0e4bfeba37df66e99770;hp=b69ec0f0abaceb6aa0408cb61a9adc51b0626765;hpb=7795f54fa8aed1e0b1edff61890e9fe25fdb7fe9;p=samtools.git diff --git a/bam_mcns.c b/bam_mcns.c index b69ec0f..cd64b0a 100644 --- a/bam_mcns.c +++ b/bam_mcns.c @@ -9,10 +9,22 @@ struct __mc_aux_t { int n, N; int ref, alt; + double theta; double *q2p, *pdg; // pdg -> P(D|g) + double *alpha, *beta; int *qsum, *bcnt, rcnt[4]; }; +void mc_init_prior(mc_aux_t *ma, double theta) +{ + double sum; + int i; + ma->theta = theta; + for (i = 0, sum = 0.; i < 2 * ma->n; ++i) + sum += (ma->alpha[i] = ma->theta / (2 * ma->n - i)); + ma->alpha[2 * ma->n] = 1. - sum; +} + mc_aux_t *mc_init(int n) // FIXME: assuming diploid { mc_aux_t *ma; @@ -23,8 +35,18 @@ mc_aux_t *mc_init(int n) // FIXME: assuming diploid ma->qsum = calloc(4 * ma->n, sizeof(int)); ma->bcnt = calloc(4 * ma->n, sizeof(int)); ma->pdg = calloc(3 * ma->n, sizeof(double)); + ma->alpha = calloc(2 * ma->n + 1, sizeof(double)); + ma->beta = calloc((2 * ma->n + 1) * 3, sizeof(double)); for (i = 0; i <= MC_MAX_SUMQ; ++i) ma->q2p[i] = pow(10., -i / 10.); + for (i = 0; i <= 2 * ma->n; ++i) { + double *bi = ma->beta + 3 * i; + double f = (double)i / (2 * ma->n); + bi[0] = (1. - f) * (1. - f); + bi[1] = 2 * f * (1. - f); + bi[2] = f * f; + } + mc_init_prior(ma, 1e-3); // the simplest prior return ma; } @@ -33,6 +55,7 @@ void mc_destroy(mc_aux_t *ma) if (ma) { free(ma->qsum); free(ma->bcnt); free(ma->q2p); free(ma->pdg); + free(ma->alpha); free(ma->beta); free(ma); } } @@ -101,7 +124,7 @@ static void cal_pdg(mc_aux_t *ma) pdg[i] = pi[i] > MC_MAX_SUMQ? MC_MAX_SUMQP : ma->q2p[pi[i]]; } } - +// return the reference allele frequency double mc_freq0(int ref, int *n, const bam_pileup1_t **plp, mc_aux_t *ma, int *_ref, int *alt) { int i, acnt[4], j; @@ -120,7 +143,7 @@ double mc_freq0(int ref, int *n, const bam_pileup1_t **plp, mc_aux_t *ma, int *_ *_ref = ma->ref; *alt = ma->alt; return ma->n == 1 || fr < 0.? f0 : fr; } - +// f0 is the reference allele frequency double mc_freq_iter(double f0, mc_aux_t *ma) { double f, f3[3]; @@ -138,3 +161,46 @@ double mc_freq_iter(double f0, mc_aux_t *ma) f /= ma->n * 2.; return f; } + +double mc_ref_prob(mc_aux_t *ma) +{ + int k, i, g; + long double PD = 0., Pref = 0.; + for (k = 0; k <= ma->n * 2; ++k) { + long double x = 1.; + double *bk = ma->beta + k * 3; + for (i = 0; i < ma->n; ++i) { + double y = 0., *pdg = ma->pdg + i * 3; + for (g = 0; g < 3; ++g) + y += pdg[g] * bk[g]; + x *= y; + } + PD += x * ma->alpha[k]; + } + for (k = 0; k <= ma->n * 2; ++k) { + long double x = 1.0; + for (i = 0; i < ma->n; ++i) + x *= ma->pdg[i * 3 + 2] * ma->beta[k * 3 + 2]; + Pref += x * ma->alpha[k]; + } + return Pref / PD; +} + +int mc_call_gt(const mc_aux_t *ma, double f0, int k) +{ + double sum, g[3]; + double max, f3[3], *pdg = ma->pdg + k * 3; + int q, i, max_i; + f3[0] = (1.-f0)*(1.-f0); f3[1] = 2.*f0*(1.-f0); f3[2] = f0*f0; + for (i = 0, sum = 0.; i < 3; ++i) + sum += (g[i] = pdg[i] * f3[i]); + for (i = 0, max = -1., max_i = 0; i < 3; ++i) { + g[i] /= sum; + if (g[i] > max) max = g[i], max_i = i; + } + max = 1. - max; + if (max < 1e-308) max = 1e-308; + q = (int)(-3.434 * log(max) + .499); + if (q > 99) q = 99; + return q<<2|max_i; +}