* mpileup: apply homopolymer correction when calculating GL, instead of before
* bcftools: apply a different prior to indels
memset(r, 0, sizeof(bcf_callret1_t));
for (i = n = 0; i < _n; ++i) {
const bam_pileup1_t *p = pl + i;
memset(r, 0, sizeof(bcf_callret1_t));
for (i = n = 0; i < _n; ++i) {
const bam_pileup1_t *p = pl + i;
- int q, b, mapQ, baseQ, is_diff, min_dist;
+ int q, b, mapQ, baseQ, is_diff, min_dist, seqQ;
// set base
if (p->is_del || p->is_refskip || (p->b->core.flag&BAM_FUNMAP)) continue;
baseQ = q = is_indel? p->aux&0xff : (int)bam1_qual(p->b)[p->qpos]; // base/indel quality
// set base
if (p->is_del || p->is_refskip || (p->b->core.flag&BAM_FUNMAP)) continue;
baseQ = q = is_indel? p->aux&0xff : (int)bam1_qual(p->b)[p->qpos]; // base/indel quality
+ seqQ = is_indel? (p->aux>>8&0xff) : 99;
if (q < bca->min_baseQ) continue;
if (q < bca->min_baseQ) continue;
+ if (q > seqQ) q = seqQ;
mapQ = p->b->core.qual < bca->capQ? p->b->core.qual : bca->capQ;
if (q > mapQ) q = mapQ;
if (q > 63) q = 63;
mapQ = p->b->core.qual < bca->capQ? p->b->core.qual : bca->capQ;
if (q > mapQ) q = mapQ;
if (q > 63) q = 63;
b = bam_nt16_nt4_table[b? b : ref_base]; // b is the 2-bit base
is_diff = (ref4 < 4 && b == ref4)? 0 : 1;
} else {
b = bam_nt16_nt4_table[b? b : ref_base]; // b is the 2-bit base
is_diff = (ref4 < 4 && b == ref4)? 0 : 1;
} else {
is_diff = (b != 0);
}
bca->bases[n++] = q<<5 | (int)bam1_strand(p->b)<<4 | b;
is_diff = (b != 0);
}
bca->bases[n++] = q<<5 | (int)bam1_strand(p->b)<<4 | b;
for (s = K = 0; s < n; ++s) {
for (i = 0; i < n_plp[s]; ++i, ++K) {
bam_pileup1_t *p = plp[s] + i;
for (s = K = 0; s < n; ++s) {
for (i = 0; i < n_plp[s]; ++i, ++K) {
bam_pileup1_t *p = plp[s] + i;
- int *sct = &score[K*n_types], indelQ;
+ int *sct = &score[K*n_types], indelQ, seqQ;
for (t = 0; t < n_types; ++t) sc[t] = sct[t]<<6 | t;
for (t = 1; t < n_types; ++t) // insertion sort
for (j = t; j > 0 && sc[j] < sc[j-1]; --j)
for (t = 0; t < n_types; ++t) sc[t] = sct[t]<<6 | t;
for (t = 1; t < n_types; ++t) // insertion sort
for (j = t; j > 0 && sc[j] < sc[j-1]; --j)
*/
if ((sc[0]&0x3f) == ref_type) {
indelQ = (sc[1]>>6) - (sc[0]>>6);
*/
if ((sc[0]&0x3f) == ref_type) {
indelQ = (sc[1]>>6) - (sc[0]>>6);
- tmp = est_seqQ(bca, types[sc[1]&0x3f], l_run);
+ seqQ = est_seqQ(bca, types[sc[1]&0x3f], l_run);
} else {
for (t = 0; t < n_types; ++t) // look for the reference type
if ((sc[t]&0x3f) == ref_type) break;
indelQ = (sc[t]>>6) - (sc[0]>>6);
} else {
for (t = 0; t < n_types; ++t) // look for the reference type
if ((sc[t]&0x3f) == ref_type) break;
indelQ = (sc[t]>>6) - (sc[0]>>6);
- tmp = est_seqQ(bca, types[sc[0]&0x3f], l_run);
+ seqQ = est_seqQ(bca, types[sc[0]&0x3f], l_run);
- if (indelQ > tmp) indelQ = tmp;
- p->aux = (sc[0]&0x3f)<<8 | indelQ;
- sumq[sc[0]&0x3f] += indelQ;
+ p->aux = (sc[0]&0x3f)<<16 | seqQ<<8 | indelQ;
+ sumq[sc[0]&0x3f] += indelQ < seqQ? indelQ : seqQ;
// fprintf(stderr, "pos=%d read=%d:%d name=%s call=%d q=%d\n", pos, s, i, bam1_qname(p->b), types[sc[0]&0x3f], indelQ);
}
}
// fprintf(stderr, "pos=%d read=%d:%d name=%s call=%d q=%d\n", pos, s, i, bam1_qname(p->b), types[sc[0]&0x3f], indelQ);
}
}
for (s = K = 0; s < n; ++s) {
for (i = 0; i < n_plp[s]; ++i, ++K) {
bam_pileup1_t *p = plp[s] + i;
for (s = K = 0; s < n; ++s) {
for (i = 0; i < n_plp[s]; ++i, ++K) {
bam_pileup1_t *p = plp[s] + i;
- int x = types[p->aux>>8&0x3f];
+ int x = types[p->aux>>16&0x3f];
for (j = 0; j < 4; ++j)
if (x == bca->indel_types[j]) break;
for (j = 0; j < 4; ++j)
if (x == bca->indel_types[j]) break;
- p->aux = j<<8 | (j == 4? 0 : (p->aux&0xff));
-// fprintf(stderr, "pos=%d read=%d:%d name=%s call=%d q=%d\n", pos, s, i, bam1_qname(p->b), p->aux>>8&63, p->aux&0xff);
+ p->aux = j<<16 | (j == 4? 0 : (p->aux&0xffff));
+// fprintf(stderr, "pos=%d read=%d:%d name=%s call=%d q=%d\n", pos, s, i, bam1_qname(p->b), p->aux>>16&63, p->aux&0xff);
#endif
#ifndef PACKAGE_VERSION
#endif
#ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.1.9-5 (r802)"
+#define PACKAGE_VERSION "0.1.9-6 (r803)"
#endif
int bam_taf2baf(int argc, char *argv[]);
#endif
int bam_taf2baf(int argc, char *argv[]);
memcpy(r->gi[i].data, b->gi[i].data, r->n_smpl * r->gi[i].len);
return 0;
}
memcpy(r->gi[i].data, b->gi[i].data, r->n_smpl * r->gi[i].len);
return 0;
}
+
+int bcf_is_indel(const bcf1_t *b)
+{
+ char *p;
+ if (strlen(b->ref) > 1) return 1;
+ for (p = b->alt; *p; ++p)
+ if (*p != ',' && p[1] != ',' && p[1] != '\0')
+ return 1;
+ return 0;
+}
int bcf_shrink_alt(bcf1_t *b, int n);
// convert GL to PL
int bcf_gl2pl(bcf1_t *b);
int bcf_shrink_alt(bcf1_t *b, int n);
// convert GL to PL
int bcf_gl2pl(bcf1_t *b);
+ // if the site is an indel
+ int bcf_is_indel(const bcf1_t *b);
// string hash table
void *bcf_build_refhash(bcf_hdr_t *h);
// string hash table
void *bcf_build_refhash(bcf_hdr_t *h);
#include "kseq.h"
KSTREAM_INIT(gzFile, gzread, 16384)
#include "kseq.h"
KSTREAM_INIT(gzFile, gzread, 16384)
-#define MC_AVG_ERR 0.007
#define MC_MAX_EM_ITER 16
#define MC_EM_EPS 1e-4
#define MC_MAX_EM_ITER 16
#define MC_EM_EPS 1e-4
+#define MC_DEF_INDEL 0.15
unsigned char seq_nt4_table[256] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
unsigned char seq_nt4_table[256] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
};
struct __bcf_p1aux_t {
};
struct __bcf_p1aux_t {
+ int n, M, n1, is_indel;
double *q2p, *pdg; // pdg -> P(D|g)
double *q2p, *pdg; // pdg -> P(D|g)
+ double *phi, *phi_indel;
double *z, *zswap; // aux for afs
double *z1, *z2, *phi1, *phi2; // only calculated when n1 is set
double t, t1, t2;
double *z, *zswap; // aux for afs
double *z1, *z2, *phi1, *phi2; // only calculated when n1 is set
double t, t1, t2;
+void bcf_p1_indel_prior(bcf_p1aux_t *ma, double x)
+{
+ int i;
+ for (i = 0; i < ma->M; ++i)
+ ma->phi_indel[i] = ma->phi[i] * x;
+ ma->phi_indel[ma->M] = 1. - ma->phi[ma->M] * x;
+}
+
static void init_prior(int type, double theta, int M, double *phi)
{
int i;
static void init_prior(int type, double theta, int M, double *phi)
{
int i;
void bcf_p1_init_prior(bcf_p1aux_t *ma, int type, double theta)
{
init_prior(type, theta, ma->M, ma->phi);
void bcf_p1_init_prior(bcf_p1aux_t *ma, int type, double theta)
{
init_prior(type, theta, ma->M, ma->phi);
+ bcf_p1_indel_prior(ma, MC_DEF_INDEL);
}
void bcf_p1_init_subprior(bcf_p1aux_t *ma, int type, double theta)
}
void bcf_p1_init_subprior(bcf_p1aux_t *ma, int type, double theta)
fprintf(stderr, "[%s] heterozygosity=%lf, ", __func__, (double)sum);
for (sum = 0., k = 1; k <= ma->M; ++k) sum += k * ma->phi[ma->M - k] / ma->M;
fprintf(stderr, "theta=%lf\n", (double)sum);
fprintf(stderr, "[%s] heterozygosity=%lf, ", __func__, (double)sum);
for (sum = 0., k = 1; k <= ma->M; ++k) sum += k * ma->phi[ma->M - k] / ma->M;
fprintf(stderr, "theta=%lf\n", (double)sum);
+ bcf_p1_indel_prior(ma, MC_DEF_INDEL);
ma->q2p = calloc(256, sizeof(double));
ma->pdg = calloc(3 * ma->n, sizeof(double));
ma->phi = calloc(ma->M + 1, sizeof(double));
ma->q2p = calloc(256, sizeof(double));
ma->pdg = calloc(3 * ma->n, sizeof(double));
ma->phi = calloc(ma->M + 1, sizeof(double));
+ ma->phi_indel = calloc(ma->M + 1, sizeof(double));
ma->phi1 = calloc(ma->M + 1, sizeof(double));
ma->phi2 = calloc(ma->M + 1, sizeof(double));
ma->z = calloc(2 * ma->n + 1, sizeof(double));
ma->phi1 = calloc(ma->M + 1, sizeof(double));
ma->phi2 = calloc(ma->M + 1, sizeof(double));
ma->z = calloc(2 * ma->n + 1, sizeof(double));
{
if (ma) {
free(ma->q2p); free(ma->pdg);
{
if (ma) {
free(ma->q2p); free(ma->pdg);
- free(ma->phi); free(ma->phi1); free(ma->phi2);
+ free(ma->phi); free(ma->phi_indel); free(ma->phi1); free(ma->phi2);
free(ma->z); free(ma->zswap); free(ma->z1); free(ma->z2);
free(ma->afs); free(ma->afs1);
free(ma);
free(ma->z); free(ma->zswap); free(ma->z1); free(ma->z2);
free(ma->afs); free(ma->afs1);
free(ma);
{
int k;
long double sum = 0.;
{
int k;
long double sum = 0.;
+ double *phi = ma->is_indel? ma->phi_indel : ma->phi;
memset(ma->afs1, 0, sizeof(double) * (ma->M + 1));
mc_cal_y(ma);
for (k = 0, sum = 0.; k <= ma->M; ++k)
memset(ma->afs1, 0, sizeof(double) * (ma->M + 1));
mc_cal_y(ma);
for (k = 0, sum = 0.; k <= ma->M; ++k)
- sum += (long double)ma->phi[k] * ma->z[k];
+ sum += (long double)phi[k] * ma->z[k];
for (k = 0; k <= ma->M; ++k) {
for (k = 0; k <= ma->M; ++k) {
- ma->afs1[k] = ma->phi[k] * ma->z[k] / sum;
+ ma->afs1[k] = phi[k] * ma->z[k] / sum;
if (isnan(ma->afs1[k]) || isinf(ma->afs1[k])) return -1.;
}
for (k = 0, sum = 0.; k <= ma->M; ++k) {
if (isnan(ma->afs1[k]) || isinf(ma->afs1[k])) return -1.;
}
for (k = 0, sum = 0.; k <= ma->M; ++k) {
{
int i, k;
long double sum = 0.;
{
int i, k;
long double sum = 0.;
+ ma->is_indel = bcf_is_indel(b);
// set PL and PL_len
for (i = 0; i < b->n_gi; ++i) {
if (b->gi[i].fmt == bcf_str2int("PL", 2)) {
// set PL and PL_len
for (i = 0; i < b->n_gi; ++i) {
if (b->gi[i].fmt == bcf_str2int("PL", 2)) {