#include "ksort.h"
KSORT_INIT_GENERIC(uint32_t)
+#define MAX_WINDOW 33
+
typedef struct __bmc_aux_t {
int max;
uint32_t *info;
typedef struct {
float esum[4], fsum[4];
uint32_t c[4];
- uint32_t mapQ_max;
+ uint32_t rms_mapQ;
} glf_call_aux_t;
char bam_nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 };
bm->theta = 0.85;
bm->n_hap = 2;
bm->eta = 0.03;
+ bm->cap_mapQ = 60;
return bm;
}
int i, j, k, w[8], c, n;
glf1_t *g = (glf1_t*)calloc(1, sizeof(glf1_t));
float p[16], min_p = 1e30;
+ uint64_t rms;
g->ref_base = ref_base;
if (_n == 0) return g;
}
for (i = n = 0; i < _n; ++i) {
const bam_pileup1_t *p = pl + i;
- uint32_t q, x = 0;
+ uint32_t q, x = 0, qq;
if (p->is_del || (p->b->core.flag&BAM_FUNMAP)) continue;
q = (uint32_t)bam1_qual(p->b)[p->qpos];
x |= (uint32_t)bam1_strand(p->b) << 18 | q << 8 | p->b->core.qual;
if (p->b->core.qual < q) q = p->b->core.qual;
x |= q << 24;
- q = bam_nt16_nt4_table[bam1_seqi(bam1_seq(p->b), p->qpos)];
+ qq = bam1_seqi(bam1_seq(p->b), p->qpos);
+ q = bam_nt16_nt4_table[qq? qq : ref_base];
if (!p->is_del && q < 4) x |= 1 << 21 | q << 16;
bm->aux->info[n++] = x;
}
// generate esum and fsum
b = (glf_call_aux_t*)calloc(1, sizeof(glf_call_aux_t));
for (k = 0; k != 8; ++k) w[k] = 0;
- b->mapQ_max = 0;
+ rms = 0;
for (j = n - 1; j >= 0; --j) { // calculate esum and fsum
uint32_t info = bm->aux->info[j];
+ int tmp;
if (info>>24 < 4 && (info>>8&0x3f) != 0) info = 4<<24 | (info&0xffffff);
k = info>>16&7;
if (info>>24 > 0) {
if (w[k] < 0xff) ++w[k];
++b->c[k&3];
}
- if (b->mapQ_max < (info&0x7f)) b->mapQ_max = info&0x7f;
+ tmp = (int)(info&0x7f) < bm->cap_mapQ? (int)(info&0x7f) : bm->cap_mapQ;
+ rms += tmp * tmp;
}
+ b->rms_mapQ = (uint8_t)(sqrt((double)rms / n) + .499);
// rescale ->c[]
for (j = c = 0; j != 4; ++j) c += b->c[j];
if (c > 255) {
if (p[j<<2|k] < 0.0) p[j<<2|k] = 0.0;
}
+ { // fix p[k<<2|k]
+ float max1, max2, min1, min2;
+ int max_k, min_k;
+ max_k = min_k = -1;
+ max1 = max2 = -1.0; min1 = min2 = 1e30;
+ for (k = 0; k < 4; ++k) {
+ if (b->esum[k] > max1) {
+ max2 = max1; max1 = b->esum[k]; max_k = k;
+ } else if (b->esum[k] > max2) max2 = b->esum[k];
+ }
+ for (k = 0; k < 4; ++k) {
+ if (p[k<<2|k] < min1) {
+ min2 = min1; min1 = p[k<<2|k]; min_k = k;
+ } else if (p[k<<2|k] < min2) min2 = p[k<<2|k];
+ }
+ if (max1 > max2 && (min_k != max_k || min1 + 1.0 > min2))
+ p[max_k<<2|max_k] = min1 > 1.0? min1 - 1.0 : 0.0;
+ }
+
// convert necessary information to glf1_t
- g->ref_base = ref_base; g->max_mapQ = b->mapQ_max;
+ g->ref_base = ref_base; g->max_mapQ = b->rms_mapQ;
g->depth = n > 16777215? 16777215 : n;
for (j = 0; j != 4; ++j)
for (k = j; k < 4; ++k)
#define MINUS_CONST 0x10000000
-bam_maqindel_ret_t *bam_maqindel(int n, int pos, const bam_maqindel_opt_t *mi, const bam_pileup1_t *pl, const char *ref)
+bam_maqindel_ret_t *bam_maqindel(int n, int pos, const bam_maqindel_opt_t *mi, const bam_pileup1_t *pl, const char *ref,
+ int _n_types, int *_types)
{
int i, j, n_types, *types, left, right;
bam_maqindel_ret_t *ret = 0;
- for (i = 0; i < n; ++i) {
- const bam_pileup1_t *p = pl + i;
- if (!(p->b->core.flag&BAM_FUNMAP) && p->indel != 0) break;
+ // if there is no proposed indel, check if there is an indel from the alignment
+ if (_n_types == 0) {
+ for (i = 0; i < n; ++i) {
+ const bam_pileup1_t *p = pl + i;
+ if (!(p->b->core.flag&BAM_FUNMAP) && p->indel != 0) break;
+ }
+ if (i == n) return 0; // no indel
}
- if (i == n) return 0; // no indel
{ // calculate how many types of indels are available (set n_types and types)
int m;
uint32_t *aux;
- aux = (uint32_t*)calloc(n+1, 4);
+ aux = (uint32_t*)calloc(n + _n_types + 1, 4);
m = 0;
aux[m++] = MINUS_CONST; // zero indel is always a type
for (i = 0; i < n; ++i) {
if (!(p->b->core.flag&BAM_FUNMAP) && p->indel != 0)
aux[m++] = MINUS_CONST + p->indel;
}
+ if (_n_types) // then also add this to aux[]
+ for (i = 0; i < _n_types; ++i)
+ if (_types[i]) aux[m++] = MINUS_CONST + _types[i];
ks_introsort(uint32_t, m, aux);
- n_types = 1;
- for (i = 1; i < m; ++i)
+ // squeeze out identical types
+ for (i = 1, n_types = 1; i < m; ++i)
if (aux[i] != aux[i-1]) ++n_types;
types = (int*)calloc(n_types, sizeof(int));
j = 0;
if (seg.tend > right) right = seg.tend;
}
}
+ if (pos - left > MAX_WINDOW) left = pos - MAX_WINDOW;
+ if (right - pos> MAX_WINDOW) right = pos + MAX_WINDOW;
}
{ // the core part
char *ref2, *inscns = 0;
bam_segreg(pos, c, cigar, &seg);
for (ps = s = 0, l = seg.qbeg; c->pos + l < right && l < seg.qend; ++l) {
int cq = bam1_seqi(bam1_seq(p->b), l), ct;
- ct = c->pos + l >= left? ref2[c->pos + l - left] : 15; // "<" should not happen if there is no bug
+ // in the following line, "<" will happen if reads are too long
+ ct = c->pos + l - seg.qbeg >= left? ref2[c->pos + l - seg.qbeg - left] : 15;
if (cq < 15 && ct < 15) {
s += cq == ct? 1 : -mi->mm_penalty;
if (cq != ct) ps += bam1_qual(p->b)[l];
if (types[i] != 0) { // then try the other way to calculate the score
for (ps = s = 0, l = seg.qbeg; c->pos + l + types[i] < right && l < seg.qend; ++l) {
int cq = bam1_seqi(bam1_seq(p->b), l), ct;
- ct = c->pos + l + types[i] >= left? ref2[c->pos + l + types[i] - left] : 15;
+ ct = c->pos + l - seg.qbeg + types[i] >= left? ref2[c->pos + l - seg.qbeg + types[i] - left] : 15;
if (cq < 15 && ct < 15) {
s += cq == ct? 1 : -mi->mm_penalty;
if (cq != ct) ps += bam1_qual(p->b)[l];
}
if (score[i*n+j] < s) score[i*n+j] = s; // choose the higher of the two scores
if (pscore[i*n+j] > ps) pscore[i*n+j] = ps;
- if (types[i] != 0) score[i*n+j] -= mi->indel_err;
- //printf("%d, %d, %d, %d\n", i, types[i], j, score[i*n+j]);
+ //if (types[i] != 0) score[i*n+j] -= mi->indel_err;
+ //printf("%d, %d, %d, %d, %d, %d, %d\n", p->b->core.pos + 1, seg.qbeg, i, types[i], j,
+ // score[i*n+j], pscore[i*n+j]);
}
}
{ // get final result
ret->s[1][k+1] = ref[pos + k + 1];
} else ret->s[1][0] = '*';
// write count
- for (j = 0; j < n; ++j) {
- if (score[max1_i*n+j] < 0 && score[max2_i*n+j] < 0) ++ret->cnt_anti;
- else {
- int diff = score[max1_i*n+j] - score[max2_i*n+j];
- if (diff > mi->ambi_thres) ++ret->cnt1;
- else if (diff < -mi->ambi_thres) ++ret->cnt2;
- else ++ret->cnt_ambi;
- }
+ for (i = 0; i < n; ++i) {
+ const bam_pileup1_t *p = pl + i;
+ if (p->indel == ret->indel1) ++ret->cnt1;
+ else if (p->indel == ret->indel2) ++ret->cnt2;
+ else ++ret->cnt_anti;
}
// write gl[]
ret->gl[0] = ret->gl[1] = 0;
for (j = 0; j < n; ++j) {
int s1 = pscore[max1_i*n + j], s2 = pscore[max2_i*n + j];
- ret->gl[0] += s1 < s2? 0 : s1 - s2 < mi->q_indel? s1 - s2 : mi->q_indel;
- ret->gl[1] += s2 < s1? 0 : s2 - s1 < mi->q_indel? s2 - s1 : mi->q_indel;
+ //printf("%d, %d, %d, %d, %d\n", pl[j].b->core.pos+1, max1_i, max2_i, s1, s2);
+ if (s1 > s2) ret->gl[0] += s1 - s2 < mi->q_indel? s1 - s2 : mi->q_indel;
+ else ret->gl[1] += s2 - s1 < mi->q_indel? s2 - s1 : mi->q_indel;
+ }
+ // write cnt_ref and cnt_ambi
+ if (max1_i != 0 && max2_i != 0) {
+ for (j = 0; j < n; ++j) {
+ int diff1 = score[j] - score[max1_i * n + j];
+ int diff2 = score[j] - score[max2_i * n + j];
+ if (diff1 > 0 && diff2 > 0) ++ret->cnt_ref;
+ else if (diff1 == 0 || diff2 == 0) ++ret->cnt_ambi;
+ }
}
}
free(score); free(pscore); free(ref2); free(inscns);