struct __mc_aux_t {
int n, N;
int ref, alt;
- double theta;
double *q2p, *pdg; // pdg -> P(D|g)
double *alpha, *beta;
- int *qsum, *bcnt, rcnt[4];
+ int *qsum, *bcnt;
};
-void mc_init_prior(mc_aux_t *ma, double theta)
+void mc_init_prior(mc_aux_t *ma, int type, double theta)
{
- double sum;
int i;
- ma->theta = theta;
- for (i = 0, sum = 0.; i < 2 * ma->n; ++i)
- sum += (ma->alpha[i] = ma->theta / (2 * ma->n - i));
- ma->alpha[2 * ma->n] = 1. - sum;
+ if (type == MC_PTYPE_COND2) {
+ for (i = 0; i <= 2 * ma->n; ++i)
+ ma->alpha[i] = 2. * (i + 1) / (2 * ma->n + 1) / (2 * ma->n + 2);
+ } else {
+ double sum;
+ for (i = 0, sum = 0.; i < 2 * ma->n; ++i)
+ sum += (ma->alpha[i] = theta / (2 * ma->n - i));
+ ma->alpha[2 * ma->n] = 1. - sum;
+ }
}
mc_aux_t *mc_init(int n) // FIXME: assuming diploid
ma->beta = calloc((2 * ma->n + 1) * 3, sizeof(double));
for (i = 0; i <= MC_MAX_SUMQ; ++i)
ma->q2p[i] = pow(10., -i / 10.);
- for (i = 0; i <= 2 * ma->n; ++i) {
+ for (i = 0; i <= 2 * ma->n; ++i) { // beta[k][g]=P(g|k/M)
double *bi = ma->beta + 3 * i;
double f = (double)i / (2 * ma->n);
bi[0] = (1. - f) * (1. - f);
bi[1] = 2 * f * (1. - f);
bi[2] = f * f;
}
- mc_init_prior(ma, 1e-3); // the simplest prior
+ mc_init_prior(ma, MC_PTYPE_FULL, 1e-3); // the simplest prior
return ma;
}
int i, j;
memset(ma->qsum, 0, sizeof(int) * 4 * ma->n);
memset(ma->bcnt, 0, sizeof(int) * 4 * ma->n);
- memset(ma->rcnt, 0, sizeof(int) * 4);
for (j = 0; j < ma->n; ++j) {
int tmp, *qsum = ma->qsum + j * 4;
int *bcnt = ma->bcnt + j * 4;
- double r = drand48(), rr;
for (i = tmp = 0; i < n[j]; ++i) {
const bam_pileup1_t *p = plp[j] + i;
int q, b;
++bcnt[b];
++tmp;
}
- if (tmp) {
- for (i = 0, rr = 0.; i < 4; ++i) {
- rr += (double)bcnt[i] / tmp;
- if (rr >= r) break;
- }
- if (i < 4) ++ma->rcnt[i];
- }
}
}
// return the reference allele frequency
double mc_freq0(int ref, int *n, const bam_pileup1_t **plp, mc_aux_t *ma, int *_ref, int *alt)
{
- int i, acnt[4], j;
- double fr = -1., f0 = -1.;
+ int i, cnt;
+ double f;
sum_err(n, plp, ma);
set_allele(ref, ma);
cal_pdg(ma);
- acnt[0] = acnt[1] = acnt[2] = acnt[3] = 0;
- for (i = 0; i < ma->n; ++i)
- for (j = 0; j < 4; ++j)
- acnt[j] += ma->bcnt[i * 4 + j];
- if (ma->rcnt[ma->ref] + ma->rcnt[ma->alt]) // never used...
- fr = (double)ma->rcnt[ref] / (ma->rcnt[ma->ref] + ma->rcnt[ma->alt]);
- if (acnt[ma->ref] + acnt[ma->alt])
- f0 = (double)acnt[ref] / (acnt[ma->ref] + acnt[ma->alt]);
*_ref = ma->ref; *alt = ma->alt;
- return ma->n == 1 || fr < 0.? f0 : fr;
+ for (i = cnt = 0, f = 0.; i < ma->n; ++i) {
+ int *bcnt = ma->bcnt + i * 4;
+ int x = bcnt[ma->ref] + bcnt[ma->alt];
+ if (x) {
+ ++cnt;
+ f += (double)bcnt[ma->ref] / x;
+ }
+ }
+ return f / cnt;
}
// f0 is the reference allele frequency
double mc_freq_iter(double f0, mc_aux_t *ma)
return f;
}
+double mc_freq_post(mc_aux_t *ma)
+{
+ int i, k;
+ long double f = 0.;
+ for (i = 0; i < ma->n; ++i) {
+ double *pdg = ma->pdg + i * 3;
+ long double y = 0., z = 0.;
+ for (k = 0; k <= ma->n * 2; ++k) {
+ double *bk = ma->beta + k * 3;
+/*
+ int g;
+ double yk = 0., zk = 0.;
+ for (g = 0; g < 3; ++g) {
+ yk += g * pdg[g] * bk[g];
+ zk += pdg[g] * bk[g];
+ }
+ y += yk * ma->alpha[k];
+ z += zk * ma->alpha[k];
+*/
+ y += (pdg[1] * bk[1] + 2. * pdg[2] * bk[2]) * ma->alpha[k];
+ z += (pdg[0] * bk[0] + pdg[1] * bk[1] + pdg[2] * bk[2]) * ma->alpha[k];
+ }
+ f += y / z;
+ }
+ return f / ma->n / 2;
+}
+
double mc_ref_prob(mc_aux_t *ma)
{
- int k, i, g;
+ int k, i;
long double PD = 0., Pref = 0.;
for (k = 0; k <= ma->n * 2; ++k) {
long double x = 1.;
double *bk = ma->beta + k * 3;
for (i = 0; i < ma->n; ++i) {
- double y = 0., *pdg = ma->pdg + i * 3;
- for (g = 0; g < 3; ++g)
- y += pdg[g] * bk[g];
- x *= y;
+ double *pdg = ma->pdg + i * 3;
+// int g; double y=0.; for (g = 0; g < 3; ++g) y += pdg[g] * bk[g];
+ x *= pdg[0] * bk[0] + pdg[1] * bk[1] + pdg[2] * bk[2];
}
PD += x * ma->alpha[k];
}
g[i] /= sum;
if (g[i] > max) max = g[i], max_i = i;
}
+// printf("***%lg,%lg,%lg,%lg,%lg,%lg\n", g[0], g[1], g[2], pdg[0], pdg[1], pdg[2]);
max = 1. - max;
if (max < 1e-308) max = 1e-308;
q = (int)(-3.434 * log(max) + .499);