X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bam2bcf_indel.c;h=819c1e7b1371296b7da5536939fab691f53c1722;hb=30004e9139b96449fa34831af2fd8e860567a29d;hp=4f747b156f439bf5d6e516cea3856bf5054e831c;hpb=ea1bf85308be7146d11a0153391d99fcfb3f83ad;p=samtools.git diff --git a/bam2bcf_indel.c b/bam2bcf_indel.c index 4f747b1..819c1e7 100644 --- a/bam2bcf_indel.c +++ b/bam2bcf_indel.c @@ -408,153 +408,3 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla free(types); free(inscns); return n_alt > 0? 0 : -1; } - -#define END_SKIP 4 // must be larger than B2B_MAX_MNP -#define MIN_MNP_FLANK_BAQ 20 - -int bcf_call_mnp_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref) -{ - extern void ks_introsort_uint32_t(int, uint32_t*); - int i, s, j, k, t, n_types, *types; - int N, ref_seq, ref_type; - if (ref == 0 || bca == 0) return -1; - if (pos < bca->last_mnp_pos + B2B_MNP_WIN) return -2; // to avoid calling a TNP multiple times - if (pos < END_SKIP || ref[pos] == 0 || ref[pos+1] == 0) return -2; // end of the reference - { // determine if there is an MNP - int r[2]; - uint8_t *tt; - r[0] = bam_nt16_table[(int)ref[pos]]; - r[1] = bam_nt16_table[(int)ref[pos+1]]; - for (s = 0; s < n; ++s) { - for (i = 0; i < n_plp[s]; ++i) { - bam_pileup1_t *p = plp[s] + i; - int left, rght; - if (p->qpos < END_SKIP || p->qpos >= p->b->core.l_qseq - END_SKIP) continue; - tt = bam1_seq(p->b); - if (bam1_seqi(tt, p->qpos) == r[0] || bam1_seqi(tt, p->qpos+1) == r[1]) continue; // no MNP - tt = bam1_qual(p->b); - for (left = 0, k = p->qpos - END_SKIP; k <= p->qpos; ++k) - left = left > tt[k]? left : tt[k]; - for (rght = 0, k = p->qpos; k < p->b->core.l_qseq - END_SKIP; ++k) - rght = rght > tt[k]? left : tt[k]; - if (left >= MIN_MNP_FLANK_BAQ && rght >= MIN_MNP_FLANK_BAQ) break; // bracketed by good bases - } - if (i != n_plp[s]) break; - } - if (s == n) return -1; // there is no MNP at this position. - } - for (s = N = 0; s < n; ++s) N += n_plp[s]; // N is the total number of reads - { // construct the MNP consensus - uint8_t *tt; - uint32_t *aux, x, *cnt; - int m = 0; - aux = calloc(N + 1, 4); - for (i = 0, x = 0; i < B2B_MAX_MNP; ++i) - x |= bam_nt16_nt4_table[bam_nt16_table[(int)ref[pos+i]]] << 2*i; - ref_seq = x; - bca->indelreg = 0; - for (s = 0; s < n; ++s) { - for (i = 0; i < n_plp[s]; ++i) { - bam_pileup1_t *p = plp[s] + i; - int stop; - if (p->qpos < END_SKIP || p->qpos >= p->b->core.l_qseq - END_SKIP) continue; - tt = bam1_seq(p->b); - for (k = j = stop = 0, x = 0; k < B2B_MAX_MNP; ++k) { - int c = bam_nt16_nt4_table[bam1_seqi(tt, p->qpos + k)]; - if (c > 3) break; - if (c != (ref_seq>>k*2&3) && !stop) ++j; - else stop = 1; - x |= c << 2*k; - } - if (k == B2B_MAX_MNP && j >= 2) aux[m++] = x; - } - } - if (m == 0) { - free(aux); return -1; - } - ks_introsort(uint32_t, m, aux); - // squeeze out indentical types - for (i = 1, n_types = 1; i < m; ++i) - if (aux[i] != aux[i-1]) ++n_types; - // count reads for each type - cnt = alloca(n_types * 4); - cnt[0] = 1<<8 | aux[0]; - for (i = 1, t = 0; i < m; ++i) { - if (aux[i] != aux[i-1]) { - ++t; - cnt[t] = 1<<8 | aux[i]; - } else cnt[t] += 1<<8; - } - free(aux); - // collect types; NOTE: we only collect ONE alternative type - ks_introsort(uint32_t, n_types, cnt); - if ((cnt[n_types-1]>>8) * MIN_SUPPORT_COEF < N) // no MNPs or too few supporting reads - return -1; - types = (int*)calloc(2, sizeof(int)); - bca->indel_types[0] = types[0] = ref_seq; - bca->indel_types[1] = types[1] = cnt[n_types-1]&0xff; - bca->indel_types[2] = bca->indel_types[3] = B2B_INDEL_NULL; - ref_type = 0; n_types = 2; - // calculate MNP length - for (i = 0; i < B2B_MAX_MNP; ++i) { - int c = types[0] >> 2*i & 3; - for (t = 1; t < n_types; ++t) - if ((types[t] >> 2*i & 3) != c) break; - if (t == n_types) break; - } - bca->indelreg = i; - if (bca->indelreg < 2) return -1; // should not happen in principle - x = (1<<2*bca->indelreg) - 1; - for (t = 0; t < n_types; ++t) types[t] &= x; - ref_seq &= x; - } - { // calling - for (s = 0; s < n; ++s) { - for (i = 0; i < n_plp[s]; ++i) { - bam_pileup1_t *p = plp[s] + i; - uint8_t *qq, *bq, *seq = bam1_seq(p->b); - uint32_t x, *cigar = bam1_cigar(p->b); - int y, skip = 0, baseQ = 0, baq = 0; - // try to get the original base quality - qq = bam1_qual(p->b); - bq = (uint8_t*)bam_aux_get(p->b, "ZQ"); - if (bq) ++bq; - // get the max original base quality - for (j = p->qpos, x = 0; j < p->b->core.l_qseq && j < p->qpos + bca->indelreg; ++j) { - int q = bq? qq[j] + (bq[j] - 64) : qq[j]; - int c = bam_nt16_nt4_table[bam1_seqi(seq, j)]; - baseQ = baseQ > q? baseQ : q; - if (c > 3) skip = 1; - x |= c << 2 * (j - p->qpos); - } - for (t = 0; t < n_types; ++t) - if (types[t] == x) break; - if (t == n_types) skip = 1; - else { // reapply BAQ - for (k = y = 0; k < p->b->core.n_cigar; ++k) { - int op = cigar[k]&0xf; - int len = cigar[k]>>4; - if (op == BAM_CMATCH) { - if (p->qpos >= y && p->qpos + bca->indelreg <= y + len) { - int left, rght; - for (j = y, left = 0; j <= p->qpos; ++j) - left = left > qq[j]? left : qq[j]; - for (j = p->qpos + bca->indelreg - 1, rght = 0; j < y + len; ++j) - rght = rght > qq[j]? rght : qq[j]; - baq = left < rght? left : rght; - break; - } - y += len; - } else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) y += len; - } - } -// fprintf(stderr, "pos=%d read=%d:%d name=%s skip=%d reg=%d call=%d baseQ=%d baq=%d\n", pos+1, s, i, bam1_qname(p->b), skip, bca->indelreg, t, baseQ, baq); - baseQ = baseQ < baq? baseQ : baq; - p->aux = skip? 0 : (t<<16|baseQ<<8|baseQ); - } - } - } - // free - free(types); - return 0; -}