double theta;
double *q2p, *pdg; // pdg -> P(D|g)
double *alpha, *beta;
- int *qsum, *bcnt, rcnt[4];
+ int *qsum, *bcnt;
};
void mc_init_prior(mc_aux_t *ma, double theta)
int i, j;
memset(ma->qsum, 0, sizeof(int) * 4 * ma->n);
memset(ma->bcnt, 0, sizeof(int) * 4 * ma->n);
- memset(ma->rcnt, 0, sizeof(int) * 4);
for (j = 0; j < ma->n; ++j) {
int tmp, *qsum = ma->qsum + j * 4;
int *bcnt = ma->bcnt + j * 4;
- double r = drand48(), rr;
for (i = tmp = 0; i < n[j]; ++i) {
const bam_pileup1_t *p = plp[j] + i;
int q, b;
++bcnt[b];
++tmp;
}
- if (tmp) {
- for (i = 0, rr = 0.; i < 4; ++i) {
- rr += (double)bcnt[i] / tmp;
- if (rr >= r) break;
- }
- if (i < 4) ++ma->rcnt[i];
- }
}
}
// return the reference allele frequency
double mc_freq0(int ref, int *n, const bam_pileup1_t **plp, mc_aux_t *ma, int *_ref, int *alt)
{
- int i, acnt[4], j;
- double fr = -1., f0 = -1.;
+ int i, cnt;
+ double f;
sum_err(n, plp, ma);
set_allele(ref, ma);
cal_pdg(ma);
- acnt[0] = acnt[1] = acnt[2] = acnt[3] = 0;
- for (i = 0; i < ma->n; ++i)
- for (j = 0; j < 4; ++j)
- acnt[j] += ma->bcnt[i * 4 + j];
- if (ma->rcnt[ma->ref] + ma->rcnt[ma->alt]) // never used...
- fr = (double)ma->rcnt[ref] / (ma->rcnt[ma->ref] + ma->rcnt[ma->alt]);
- if (acnt[ma->ref] + acnt[ma->alt])
- f0 = (double)acnt[ref] / (acnt[ma->ref] + acnt[ma->alt]);
*_ref = ma->ref; *alt = ma->alt;
- return ma->n == 1 || fr < 0.? f0 : fr;
+ for (i = cnt = 0, f = 0.; i < ma->n; ++i) {
+ int *bcnt = ma->bcnt + i * 4;
+ int x = bcnt[ma->ref] + bcnt[ma->alt];
+ if (x) {
+ ++cnt;
+ f += (double)bcnt[ma->ref] / x;
+ }
+ }
+ return f / cnt;
}
// f0 is the reference allele frequency
double mc_freq_iter(double f0, mc_aux_t *ma)
f = f0 = mc_freq0(_ref0, n_plp, plp, ma, &_ref, &_alt);
if (f >= 0.0) {
double q, flast = f;
- for (j = 0; j < 10; ++j){
+ for (j = 0; j < 10; ++j) {
f = mc_freq_iter(flast, ma);
+// printf("%lg->%lg\n", flast, f);
if (fabs(f - flast) < 1e-3) break;
flast = f;
}
+ if (1. - f < 1e-4) f = 1.;
pref = mc_ref_prob(ma);
is_var = (pref < .5);
q = is_var? pref : 1. - pref;
}
}
if (argc == 1) {
- fprintf(stderr, "Usage: samtools mpileup [-r reg] [-f in.fa] [-l pos] in1.bam [in2.bam [...]]\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Usage: samtools mpileup [options] in1.bam [in2.bam [...]]\n\n");
+ fprintf(stderr, "Options: -f FILE reference sequence file [null]\n");
+ fprintf(stderr, " -r STR region in which pileup is generated [null]\n");
+ fprintf(stderr, " -l FILE list of positions (format: chr pos) [null]\n");
+ fprintf(stderr, " -M INT cap mapping quality at INT [%d]\n", mplp.max_mq);
+ fprintf(stderr, " -q INT filter out alignment with MQ smaller than INT [%d]\n", mplp.min_mq);
+ fprintf(stderr, " -V generate VCF output (SNP calling)\n");
+ fprintf(stderr, " -v show variant sites only\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Notes: Assuming error independency and diploid individuals.\n\n");
return 1;
}
mpileup(&mplp, argc - optind, argv + optind);