]> git.donarmstrong.com Git - samtools.git/blobdiff - bam2bcf_indel.c
* samtools-0.1.10-4 (r840)
[samtools.git] / bam2bcf_indel.c
index 4f747b156f439bf5d6e516cea3856bf5054e831c..819c1e7b1371296b7da5536939fab691f53c1722 100644 (file)
@@ -408,153 +408,3 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
        free(types); free(inscns);
        return n_alt > 0? 0 : -1;
 }
-
-#define END_SKIP 4 // must be larger than B2B_MAX_MNP
-#define MIN_MNP_FLANK_BAQ 20
-
-int bcf_call_mnp_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref)
-{
-       extern void ks_introsort_uint32_t(int, uint32_t*);
-       int i, s, j, k, t, n_types, *types;
-       int N, ref_seq, ref_type;
-       if (ref == 0 || bca == 0) return -1;
-       if (pos < bca->last_mnp_pos + B2B_MNP_WIN) return -2; // to avoid calling a TNP multiple times
-       if (pos < END_SKIP || ref[pos] == 0 || ref[pos+1] == 0) return -2; // end of the reference
-       { // determine if there is an MNP
-               int r[2];
-               uint8_t *tt;
-               r[0] = bam_nt16_table[(int)ref[pos]];
-               r[1] = bam_nt16_table[(int)ref[pos+1]];
-               for (s = 0; s < n; ++s) {
-                       for (i = 0; i < n_plp[s]; ++i) {
-                               bam_pileup1_t *p = plp[s] + i;
-                               int left, rght;
-                               if (p->qpos < END_SKIP || p->qpos >= p->b->core.l_qseq - END_SKIP) continue;
-                               tt = bam1_seq(p->b);
-                               if (bam1_seqi(tt, p->qpos) == r[0] || bam1_seqi(tt, p->qpos+1) == r[1]) continue; // no MNP
-                               tt = bam1_qual(p->b);
-                               for (left = 0, k = p->qpos - END_SKIP; k <= p->qpos; ++k)
-                                       left = left > tt[k]? left : tt[k];
-                               for (rght = 0, k = p->qpos; k < p->b->core.l_qseq - END_SKIP; ++k)
-                                       rght = rght > tt[k]? left : tt[k];
-                               if (left >= MIN_MNP_FLANK_BAQ && rght >= MIN_MNP_FLANK_BAQ) break; // bracketed by good bases
-                       }
-                       if (i != n_plp[s]) break;
-               }
-               if (s == n) return -1; // there is no MNP at this position.
-       }
-       for (s = N = 0; s < n; ++s) N += n_plp[s]; // N is the total number of reads
-       { // construct the MNP consensus
-               uint8_t *tt;
-               uint32_t *aux, x, *cnt;
-               int m = 0;
-               aux = calloc(N + 1, 4);
-               for (i = 0, x = 0; i < B2B_MAX_MNP; ++i)
-                       x |= bam_nt16_nt4_table[bam_nt16_table[(int)ref[pos+i]]] << 2*i;
-               ref_seq = x;
-               bca->indelreg = 0;
-               for (s = 0; s < n; ++s) {
-                       for (i = 0; i < n_plp[s]; ++i) {
-                               bam_pileup1_t *p = plp[s] + i;
-                               int stop;
-                               if (p->qpos < END_SKIP || p->qpos >= p->b->core.l_qseq - END_SKIP) continue;
-                               tt = bam1_seq(p->b);
-                               for (k = j = stop = 0, x = 0; k < B2B_MAX_MNP; ++k) {
-                                       int c = bam_nt16_nt4_table[bam1_seqi(tt, p->qpos + k)];
-                                       if (c > 3) break;
-                                       if (c != (ref_seq>>k*2&3) && !stop) ++j;
-                                       else stop = 1;
-                                       x |= c << 2*k;
-                               }
-                               if (k == B2B_MAX_MNP && j >= 2) aux[m++] = x;
-                       }
-               }
-               if (m == 0) {
-                       free(aux); return -1;
-               }
-               ks_introsort(uint32_t, m, aux);
-               // squeeze out indentical types
-               for (i = 1, n_types = 1; i < m; ++i)
-                       if (aux[i] != aux[i-1]) ++n_types;
-               // count reads for each type
-               cnt = alloca(n_types * 4);
-               cnt[0] = 1<<8 | aux[0];
-               for (i = 1, t = 0; i < m; ++i) {
-                       if (aux[i] != aux[i-1]) {
-                               ++t;
-                               cnt[t] = 1<<8 | aux[i];
-                       } else cnt[t] += 1<<8;
-               }
-               free(aux);
-               // collect types; NOTE: we only collect ONE alternative type
-               ks_introsort(uint32_t, n_types, cnt);
-               if ((cnt[n_types-1]>>8) * MIN_SUPPORT_COEF < N) // no MNPs or too few supporting reads
-                       return -1;
-               types = (int*)calloc(2, sizeof(int));
-               bca->indel_types[0] = types[0] = ref_seq;
-               bca->indel_types[1] = types[1] = cnt[n_types-1]&0xff;
-               bca->indel_types[2] = bca->indel_types[3] = B2B_INDEL_NULL;
-               ref_type = 0; n_types = 2;
-               // calculate MNP length
-               for (i = 0; i < B2B_MAX_MNP; ++i) {
-                       int c = types[0] >> 2*i & 3;
-                       for (t = 1; t < n_types; ++t)
-                               if ((types[t] >> 2*i & 3) != c) break;
-                       if (t == n_types) break;
-               }
-               bca->indelreg = i;
-               if (bca->indelreg < 2) return -1; // should not happen in principle
-               x = (1<<2*bca->indelreg) - 1;
-               for (t = 0; t < n_types; ++t) types[t] &= x;
-               ref_seq &= x;
-       }
-       { // calling
-               for (s = 0; s < n; ++s) {
-                       for (i = 0; i < n_plp[s]; ++i) {
-                               bam_pileup1_t *p = plp[s] + i;
-                               uint8_t *qq, *bq, *seq = bam1_seq(p->b);
-                               uint32_t x, *cigar = bam1_cigar(p->b);
-                               int y, skip = 0, baseQ = 0, baq = 0;
-                               // try to get the original base quality
-                               qq = bam1_qual(p->b);
-                               bq = (uint8_t*)bam_aux_get(p->b, "ZQ");
-                               if (bq) ++bq;
-                               // get the max original base quality
-                               for (j = p->qpos, x = 0; j < p->b->core.l_qseq && j < p->qpos + bca->indelreg; ++j) {
-                                       int q = bq? qq[j] + (bq[j] - 64) : qq[j];
-                                       int c = bam_nt16_nt4_table[bam1_seqi(seq, j)];
-                                       baseQ = baseQ > q? baseQ : q;
-                                       if (c > 3) skip = 1;
-                                       x |= c << 2 * (j - p->qpos);
-                               }
-                               for (t = 0; t < n_types; ++t)
-                                       if (types[t] == x) break;
-                               if (t == n_types) skip = 1;
-                               else { // reapply BAQ
-                                       for (k = y = 0; k < p->b->core.n_cigar; ++k) {
-                                               int op = cigar[k]&0xf;
-                                               int len = cigar[k]>>4;
-                                               if (op == BAM_CMATCH) {
-                                                       if (p->qpos >= y && p->qpos + bca->indelreg <= y + len) {
-                                                               int left, rght;
-                                                               for (j = y, left = 0; j <= p->qpos; ++j)
-                                                                       left = left > qq[j]? left : qq[j];
-                                                               for (j = p->qpos + bca->indelreg - 1, rght = 0; j < y + len; ++j)
-                                                                       rght = rght > qq[j]? rght : qq[j];
-                                                               baq = left < rght? left : rght;
-                                                               break;
-                                                       }
-                                                       y += len;
-                                               } else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) y += len;
-                                       }
-                               }
-//                             fprintf(stderr, "pos=%d read=%d:%d name=%s skip=%d reg=%d call=%d baseQ=%d baq=%d\n", pos+1, s, i, bam1_qname(p->b), skip, bca->indelreg, t, baseQ, baq);
-                               baseQ = baseQ < baq? baseQ : baq;
-                               p->aux = skip? 0 : (t<<16|baseQ<<8|baseQ);
-                       }
-               }
-       }
-       // free
-       free(types);
-       return 0;
-}