#include "bam2bcf.h"
#include "bcftools/bcf.h"
-#define MAX_END_DIST 30
+#define END_DIST_THRES 11
extern void ks_introsort_uint32_t(size_t n, uint32_t a[]);
// calculate min_dist
min_dist = p->b->core.l_qseq - 1 - p->qpos;
if (min_dist > p->qpos) min_dist = p->qpos;
- if (min_dist > MAX_END_DIST) min_dist = MAX_END_DIST;
- k >>= 1;
- r->dsum[k] += min_dist;
- r->d2sum[k] += min_dist * min_dist;
+ k = (k&2) | (min_dist <= END_DIST_THRES);
+ ++r->ed[k];
}
ks_introsort_uint32_t(n, bca->info);
r->depth = n;
call->shift = (int)(sum_min + .499);
}
memset(call->d, 0, 4 * sizeof(int));
- call->davg[0] = call->davg[1] = call->dstd[0] = call->dstd[1] = 0.;
+ memset(call->ed, 0, 4 * sizeof(int));
for (i = call->depth = 0, tmp = 0; i < n; ++i) {
call->depth += calls[i].depth;
- for (j = 0; j < 4; ++j) call->d[j] += calls[i].d[j];
- for (j = 0; j < 2; ++j) {
- call->davg[j] += calls[i].dsum[j];
- call->dstd[j] += calls[i].d2sum[j];
- }
+ for (j = 0; j < 4; ++j) call->d[j] += calls[i].d[j], call->ed[j] += calls[i].ed[j];
tmp += calls[i].sum_Q2;
}
- for (j = 0; j < 2; ++j) {
- int x = call->d[j*2] + call->d[j*2+1];
- if (x == 0) {
- call->davg[j] = call->dstd[j] = 0.;
- } else {
- call->davg[j] /= x;
- call->dstd[j] = sqrt(call->dstd[j] / x - call->davg[j] * call->davg[j]);
- }
- }
call->rmsQ = (int)(sqrt((double)tmp / call->depth) + .499);
return 0;
}
if (i) kputc(',', &s);
kputw(bc->d[i], &s);
}
- if (bc->d[2] + bc->d[3] > 1)
- ksprintf(&s, ";MED=%.1lf,%.1lf,%.1lf,%.1lf", bc->davg[0], bc->dstd[0], bc->davg[1], bc->dstd[1]);
+ kputs(";ED4=", &s);
+ for (i = 0; i < 4; ++i) {
+ if (i) kputc(',', &s);
+ kputw(bc->ed[i], &s);
+ }
kputc('\0', &s);
// FMT
kputs("PL", &s); kputc('\0', &s);
typedef struct __bcf_callaux_t bcf_callaux_t;
typedef struct {
- int depth, d[4];
- int dsum[2], d2sum[2];
+ int depth, d[4], ed[4];
uint64_t sum_Q2;
float p[25], esum[5];
} bcf_callret1_t;
typedef struct {
int a[5]; // alleles: ref, alt, alt2, alt3
int n, n_alleles, shift, ori_ref, unseen;
- int d[4], depth, rmsQ;
- double davg[2], dstd[2];
+ int d[4], ed[4], depth, rmsQ;
uint8_t *PL;
} bcf_call_t;
#include <math.h>
#include <stdlib.h>
+/* This program is implemented with ideas from this web page:
+ *
+ * http://www.langsrud.com/fisher.htm
+ */
+
// log\binom{n}{k}
static double lbinom(int n, int k)
{
aux->n11 = n11; aux->n1_ = n1_; aux->n_1 = n_1; aux->n = n;
} else { // then only n11 changed; the rest fixed
if (n11%11 && n11 + aux->n - aux->n1_ - aux->n_1) {
- if (n11 == aux->n11 + 1) {
+ if (n11 == aux->n11 + 1) { // incremental
aux->p *= (double)(aux->n1_ - aux->n11) / n11
* (aux->n_1 - aux->n11) / (n11 + aux->n - aux->n1_ - aux->n_1);
aux->n11 = n11;
return aux->p;
}
- if (n11 == aux->n11 - 1) {
+ if (n11 == aux->n11 - 1) { // incremental
aux->p *= (double)aux->n11 / (aux->n1_ - n11)
* (aux->n11 + aux->n - aux->n1_ - aux->n_1) / (aux->n_1 - n11);
aux->n11 = n11;
int n1_, n_1, n;
n1_ = n11 + n12; n_1 = n11 + n21; n = n11 + n12 + n21 + n22; // calculate n1_, n_1 and n
- max = (n_1 < n1_) ? n_1 : n1_; // for right tail
- min = (n1_ + n_1 - n < 0) ? 0 : (n1_ + n_1 - n < 0); // for left tail
- if (min == max) { // no need to do test
- *two = *_left = *_right = 1.;
- return 1.;
- }
- // the probability of the current table
- q = hypergeo_acc(n11, n1_, n_1, n, &aux);
+ max = (n_1 < n1_) ? n_1 : n1_; // max n11, for right tail
+ min = (n1_ + n_1 - n < 0) ? 0 : (n1_ + n_1 - n < 0); // min n11, for left tail
+ *two = *_left = *_right = 1.;
+ if (min == max) return 1.; // no need to do test
+ q = hypergeo_acc(n11, n1_, n_1, n, &aux); // the probability of the current table
// left tail
p = hypergeo_acc(min, 0, 0, 0, &aux);
- for (left = 0., i = min + 1; p < 0.99999999 * q; ++i) {
- left += p;
- p = hypergeo_acc(i, 0, 0, 0, &aux);
- }
+ for (left = 0., i = min + 1; p < 0.99999999 * q; ++i) // loop until underflow
+ left += p, p = hypergeo_acc(i, 0, 0, 0, &aux);
--i;
if (p < 1.00000001 * q) left += p;
else --i;
// right tail
p = hypergeo_acc(max, 0, 0, 0, &aux);
- for (right = 0., j = max - 1; p < 0.99999999 * q; --j) {
- right += p;
- p = hypergeo_acc(j, 0, 0, 0, &aux);
- }
+ for (right = 0., j = max - 1; p < 0.99999999 * q; --j) // loop until underflow
+ right += p, p = hypergeo_acc(j, 0, 0, 0, &aux);
if (p < 1.00000001 * q) right += p;
else ++j;
// two-tail
return sum / ma->M;
}
+static long double p1_cal_g3(bcf_p1aux_t *p1a, double g[3])
+{
+ long double pd = 0., g2[3];
+ int i, k;
+ memset(g2, 0, sizeof(long double) * 3);
+ for (k = 0; k < p1a->M; ++k) {
+ double f = (double)k / p1a->M, f3[3], g1[3];
+ long double z = 1.;
+ g1[0] = g1[1] = g1[2] = 0.;
+ f3[0] = (1. - f) * (1. - f); f3[1] = 2. * f * (1. - f); f3[2] = f * f;
+ for (i = 0; i < p1a->n; ++i) {
+ double *pdg = p1a->pdg + i * 3;
+ double x = pdg[0] * f3[0] + pdg[1] * f3[1] + pdg[2] * f3[2];
+ z *= x;
+ g1[0] += pdg[0] * f3[0] / x;
+ g1[1] += pdg[1] * f3[1] / x;
+ g1[2] += pdg[2] * f3[2] / x;
+ }
+ pd += p1a->phi[k] * z;
+ for (i = 0; i < 3; ++i)
+ g2[i] += p1a->phi[k] * z * g1[i];
+ }
+ for (i = 0; i < 3; ++i) g[i] = g2[i] / pd;
+ return pd;
+}
+
int bcf_p1_cal(bcf1_t *b, bcf_p1aux_t *ma, bcf_p1rst_t *rst)
{
int i, k;
flast = rst->f_em;
}
}
+ p1_cal_g3(ma, rst->g);
return 0;
}
typedef struct {
int rank0;
- double f_em, f_exp, f_flat, p_ref;
+ double f_em, f_exp, f_flat, p_ref, g[3];
} bcf_p1rst_t;
#define MC_PTYPE_FULL 1
kputc('\0', &s); kputc('\0', &s);
kputs(b->info, &s);
if (b->info[0]) kputc(';', &s);
- ksprintf(&s, "AF1=%.3lf;AFE=%.3lf", 1.-pr->f_em, 1.-pr->f_exp);
+ ksprintf(&s, "AF1=%.3lf;AFE=%.3lf;HWE=%.3lf,%.3lf,%.3lf", 1.-pr->f_em, 1.-pr->f_exp, pr->g[0], pr->g[1], pr->g[2]);
kputc('\0', &s);
kputs(b->fmt, &s); kputc('\0', &s);
free(b->str);