* allele frequency calculation apparently works...
#include <stdlib.h>
#include "bam_mcns.h"
#include <stdlib.h>
#include "bam_mcns.h"
#define MC_MAX_SUMQ 3000
#define MC_MAX_SUMQP 1e-300
#define MC_MAX_SUMQ 3000
#define MC_MAX_SUMQP 1e-300
int n, N;
int ref, alt;
double *q2p, *pdg; // pdg -> P(D|g)
int n, N;
int ref, alt;
double *q2p, *pdg; // pdg -> P(D|g)
+ int *qsum, *bcnt, rcnt[4];
};
mc_aux_t *mc_init(int n) // FIXME: assuming diploid
};
mc_aux_t *mc_init(int n) // FIXME: assuming diploid
int i, j;
memset(ma->qsum, 0, sizeof(int) * 4 * ma->n);
memset(ma->bcnt, 0, sizeof(int) * 4 * ma->n);
int i, j;
memset(ma->qsum, 0, sizeof(int) * 4 * ma->n);
memset(ma->bcnt, 0, sizeof(int) * 4 * ma->n);
+ memset(ma->rcnt, 0, sizeof(int) * 4);
for (j = 0; j < ma->n; ++j) {
for (j = 0; j < ma->n; ++j) {
- int *qsum = ma->qsum + j * 4;
+ int tmp, *qsum = ma->qsum + j * 4;
int *bcnt = ma->bcnt + j * 4;
int *bcnt = ma->bcnt + j * 4;
- for (i = 0; i < n[j]; ++i) {
+ double r = drand48(), rr;
+ for (i = tmp = 0; i < n[j]; ++i) {
const bam_pileup1_t *p = plp[j] + i;
int q, b;
if (p->is_del || (p->b->core.flag&BAM_FUNMAP)) continue;
const bam_pileup1_t *p = plp[j] + i;
int q, b;
if (p->is_del || (p->b->core.flag&BAM_FUNMAP)) continue;
if (b > 3) continue; // N
qsum[b] += q;
++bcnt[b];
if (b > 3) continue; // N
qsum[b] += q;
++bcnt[b];
+ ++tmp;
+ }
+ if (tmp) {
+ for (i = 0, rr = 0.; i < 4; ++i) {
+ rr += (double)bcnt[i] / tmp;
+ if (rr >= r) break;
+ }
+ if (i < 4) ++ma->rcnt[i];
qsum = ma->qsum + j * 4;
bcnt = ma->bcnt + j * 4;
pi[1] = 3 * (bcnt[ma->ref] + bcnt[ma->alt]);
qsum = ma->qsum + j * 4;
bcnt = ma->bcnt + j * 4;
pi[1] = 3 * (bcnt[ma->ref] + bcnt[ma->alt]);
- pi[0] = qsum[ma->alt];
- pi[2] = qsum[ma->ref];
+ pi[0] = qsum[ma->ref];
+ pi[2] = qsum[ma->alt];
- pdg[i] = pi[i] < MC_MAX_SUMQ? MC_MAX_SUMQP : ma->q2p[pi[i]];
+ pdg[i] = pi[i] > MC_MAX_SUMQ? MC_MAX_SUMQP : ma->q2p[pi[i]];
}
}
double mc_freq0(int ref, int *n, const bam_pileup1_t **plp, mc_aux_t *ma, int *_ref, int *alt)
{
int i, acnt[4], j;
}
}
double mc_freq0(int ref, int *n, const bam_pileup1_t **plp, mc_aux_t *ma, int *_ref, int *alt)
{
int i, acnt[4], j;
+ double fr = -1., f0 = -1.;
sum_err(n, plp, ma);
set_allele(ref, ma);
cal_pdg(ma);
sum_err(n, plp, ma);
set_allele(ref, ma);
cal_pdg(ma);
for (i = 0; i < ma->n; ++i)
for (j = 0; j < 4; ++j)
acnt[j] += ma->bcnt[i * 4 + j];
for (i = 0; i < ma->n; ++i)
for (j = 0; j < 4; ++j)
acnt[j] += ma->bcnt[i * 4 + j];
- f0 = acnt[ma->ref] + acnt[ma->alt] == 0? -1.
- : (double)acnt[ref] / (acnt[ma->ref] + acnt[ma->alt]);
+ if (ma->rcnt[ma->ref] + ma->rcnt[ma->alt]) // never used...
+ fr = (double)ma->rcnt[ref] / (ma->rcnt[ma->ref] + ma->rcnt[ma->alt]);
+ if (acnt[ma->ref] + acnt[ma->alt])
+ f0 = (double)acnt[ref] / (acnt[ma->ref] + acnt[ma->alt]);
*_ref = ma->ref; *alt = ma->alt;
*_ref = ma->ref; *alt = ma->alt;
+ return ma->n == 1 || fr < 0.? f0 : fr;
}
double mc_freq_iter(double f0, mc_aux_t *ma)
{
double f, f3[3];
int i, j;
}
double mc_freq_iter(double f0, mc_aux_t *ma)
{
double f, f3[3];
int i, j;
- f3[0] = f0*f0; f3[1] = 2.*f0*(1.-f0); f3[2] = (1.-f0)*(1.-f0);
+ f3[0] = (1.-f0)*(1.-f0); f3[1] = 2.*f0*(1.-f0); f3[2] = f0*f0;
for (i = 0, f = 0.; i < ma->n; ++i) {
double up, dn, *pdg;
pdg = ma->pdg + i * 3;
for (i = 0, f = 0.; i < ma->n; ++i) {
double up, dn, *pdg;
pdg = ma->pdg + i * 3;
dn += pdg[j] * f3[j];
f += up / dn;
}
dn += pdg[j] * f3[j];
f += up / dn;
}
ref_tid = tid;
}
if (conf->vcf) {
ref_tid = tid;
}
if (conf->vcf) {
- double f;
- int j, _ref, _alt, _ref0, depth, rms_q;
+ double f0, f; // the reference allele frequency
+ int j, _ref, _alt, _ref0, depth, rms_q, _ref0b;
- _ref0 = (ref && pos < ref_len)? ref[pos] : 'N';
+ _ref0 = _ref0b = (ref && pos < ref_len)? ref[pos] : 'N';
_ref0 = bam_nt16_nt4_table[bam_nt16_table[_ref0]];
_ref0 = bam_nt16_nt4_table[bam_nt16_table[_ref0]];
- f = mc_freq0(_ref0, n_plp, plp, ma, &_ref, &_alt);
- printf("%s\t%d\t.\t%c\t", h->target_name[tid], pos + 1, "ACGTN"[_ref0]);
+ f = f0 = mc_freq0(_ref0, n_plp, plp, ma, &_ref, &_alt);
+ if (f >= 0.0) {
+ double flast = f;
+ for (j = 0; j < 10; ++j) {
+ f = mc_freq_iter(flast, ma);
+ if (fabs(f - flast) < 1e-3) break;
+ flast = f;
+ }
+ }
+ printf("%s\t%d\t.\t%c\t", h->target_name[tid], pos + 1, _ref0b);
if (_ref0 == _ref) putchar("ACGTN"[_alt]);
else printf("%c,%c", "ACGTN"[_ref], "ACGTN"[_alt]);
if (_ref0 == _ref) putchar("ACGTN"[_alt]);
else printf("%c,%c", "ACGTN"[_ref], "ACGTN"[_alt]);
- printf("\t0\tPASS\t"); // FIXME: currently these not available
+ printf("\t0\t"); // FIXME: currently these not available
+ if (f0 < 0.) printf("Q13\t");
+ else printf(".\t");
for (i = depth = 0, sqr_sum = 0; i < n; ++i) {
depth += n_plp[i];
for (j = 0; j < n_plp[i]; ++j) {
for (i = depth = 0, sqr_sum = 0; i < n; ++i) {
depth += n_plp[i];
for (j = 0; j < n_plp[i]; ++j) {
}
}
rms_q = (int)(sqrt((double)sqr_sum / depth) + .499);
}
}
rms_q = (int)(sqrt((double)sqr_sum / depth) + .499);
- printf("DP=%d;MQ=%d;AF=%.3lg", depth, rms_q, 1.-f); // FIXME: not working for triallelic alleles
+ printf("DP=%d;MQ=%d;AF=%.3lg", depth, rms_q, f0<0.?0.:1.-f);
printf("\tDP");
// output genotype information; FIXME: to be implmented...
for (i = 0; i < n; ++i)
printf("\tDP");
// output genotype information; FIXME: to be implmented...
for (i = 0; i < n; ++i)
#endif
#ifndef PACKAGE_VERSION
#endif
#ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.1.8-1 (r615)"
+#define PACKAGE_VERSION "0.1.8-2 (r617)"
#endif
int bam_taf2baf(int argc, char *argv[]);
#endif
int bam_taf2baf(int argc, char *argv[]);