bca->indelreg = 0;
for (t = 0; t < n_types; ++t) {
int l, ir;
- ka_param2_t ap = ka_param2_qual;
- ap.band_width = abs(types[t]) + 3;
+ kpa_par_t ap = { 1e-4, 1e-2, 10 };
+ ap.bw = abs(types[t]) + 3;
// compute indelreg
if (types[t] == 0) ir = 0;
else if (types[t] > 0) ir = est_indelreg(pos, ref, types[t], &inscns[t*max_ins]);
bam_pileup1_t *p = plp[s] + i;
int qbeg, qend, tbeg, tend, sc;
uint8_t *seq = bam1_seq(p->b);
+ // FIXME: the following skips soft clips, but using them may be more sensitive.
// determine the start and end of sequences for alignment
qbeg = tpos2qpos(&p->b->core, bam1_cigar(p->b), left, 0, &tbeg);
qend = tpos2qpos(&p->b->core, bam1_cigar(p->b), right, 1, &tend);
// write the query sequence
for (l = qbeg; l < qend; ++l)
query[l - qbeg] = bam_nt16_nt4_table[bam1_seqi(seq, l)];
- // do alignment; this takes most of computing time for indel calling
- if (0) {
- uint8_t *qq = calloc(qend - qbeg, 1);
- for (l = 0; l < qend - qbeg; ++l) qq[l] = 23;
+ { // do alignment; this is the bottleneck
+ const uint8_t *qual = bam1_qual(p->b), *bq;
+ uint8_t *qq = 0;
+ qq = calloc(qend - qbeg, 1);
+ bq = (uint8_t*)bam_aux_get(p->b, "BQ");
+ if (bq) ++bq;
+ for (l = qbeg; l < qend; ++l)
+ qq[l - qbeg] = bq? qual[l] + (bq[l] - 33) : qual[l];
sc = kpa_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
- (uint8_t*)query, qend - qbeg, qq, &kpa_par_alt, 0, 0);
+ (uint8_t*)query, qend - qbeg, qq, &ap, 0, 0);
score[K*n_types + t] = sc;
- } else {
- sc = ka_global_score((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
- (uint8_t*)query, qend - qbeg, &ap);
- score[K*n_types + t] = -sc;
+ free(qq);
}
/*
for (l = 0; l < tend - tbeg + abs(types[t]); ++l)
return (int)(t + .499);
}
-int bam_prob_realn(bam1_t *b, const char *ref)
+int bam_prob_realn_core(bam1_t *b, const char *ref, int write_bq)
{
int k, i, bw, x, y, yb, ye, xb, xe;
uint32_t *cigar = bam1_cigar(b);
if (xe - xb - c->l_qseq > bw)
xb += (xe - xb - c->l_qseq - bw) / 2, xe -= (xe - xb - c->l_qseq - bw) / 2;
{ // glocal
- uint8_t *s, *r, *q, *seq = bam1_seq(b), *qual = bam1_qual(b);
+ uint8_t *s, *r, *q, *seq = bam1_seq(b), *qual = bam1_qual(b), *bq = 0;
int *state;
+ if (write_bq) {
+ bq = calloc(c->l_qseq + 1, 1);
+ memcpy(bq, qual, c->l_qseq);
+ }
s = calloc(c->l_qseq, 1);
for (i = 0; i < c->l_qseq; ++i) s[i] = bam_nt16_nt4_table[bam1_seqi(seq, i)];
r = calloc(xe - xb, 1);
} else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) y += l;
else if (op == BAM_CDEL) x += l;
}
+ if (write_bq) {
+ for (i = 0; i < c->l_qseq; ++i) bq[i] = bq[i] - qual[i] + 33;
+ bam_aux_append(b, "BQ", 'Z', c->l_qseq + 1, bq);
+ free(bq);
+ }
free(s); free(r); free(q); free(state);
}
return 0;
}
+int bam_prob_realn(bam1_t *b, const char *ref)
+{
+ return bam_prob_realn_core(b, ref, 0);
+}
+
int bam_fillmd(int argc, char *argv[])
{
int c, is_equal = 0, tid = -2, ret, len, is_bam_out, is_sam_in, is_uncompressed, max_nm = 0, is_realn, capQ = 0;
fprintf(stderr, "[bam_fillmd] fail to find sequence '%s' in the reference.\n",
fp->header->target_name[tid]);
}
- if (is_realn) bam_prob_realn(b, ref);
+ if (is_realn) bam_prob_realn_core(b, ref, 0);
if (capQ > 10) {
int q = bam_cap_mapQ(b, ref, capQ);
if (b->core.qual > q) b->core.qual = q;
static int mplp_func(void *data, bam1_t *b)
{
extern int bam_realn(bam1_t *b, const char *ref);
- extern int bam_prob_realn(bam1_t *b, const char *ref);
+ extern int bam_prob_realn_core(bam1_t *b, const char *ref, int);
extern int bam_cap_mapQ(bam1_t *b, char *ref, int thres);
mplp_aux_t *ma = (mplp_aux_t*)data;
int ret, skip = 0;
ret = ma->iter? bam_iter_read(ma->fp, ma->iter, b) : bam_read1(ma->fp, b);
if (ret < 0) break;
skip = 0;
- if (has_ref && (ma->flag&MPLP_REALN)) bam_prob_realn(b, ma->ref);
+ if (has_ref && (ma->flag&MPLP_REALN)) bam_prob_realn_core(b, ma->ref, 1);
if (has_ref && ma->capQ_thres > 10) {
int q = bam_cap_mapQ(b, ma->ref, ma->capQ_thres);
if (q < 0) skip = 1;
#endif
#ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.1.9-8 (r809)"
+#define PACKAGE_VERSION "0.1.9-9 (r810)"
#endif
int bam_taf2baf(int argc, char *argv[]);