]> git.donarmstrong.com Git - samtools.git/blobdiff - bam2bcf_indel.c
Removed the pileup command
[samtools.git] / bam2bcf_indel.c
index c71280cf718b08f156fe27cadf4685180a92dcd4..f37a7831f9625be14b416510374fd5f71150cfdd 100644 (file)
@@ -3,12 +3,14 @@
 #include <string.h>
 #include "bam.h"
 #include "bam2bcf.h"
-#include "ksort.h"
 #include "kaln.h"
 #include "kprobaln.h"
 #include "khash.h"
 KHASH_SET_INIT_STR(rg)
 
+#include "ksort.h"
+KSORT_INIT_GENERIC(uint32_t)
+
 #define MINUS_CONST 0x10000000
 #define INDEL_WINDOW_SIZE 50
 
@@ -110,7 +112,6 @@ static inline int est_indelreg(int pos, const char *ref, int l, char *ins4)
 int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref,
                                          const void *rghash)
 {
-       extern void ks_introsort_uint32_t(int, uint32_t*);
        int i, s, j, k, t, n_types, *types, max_rd_len, left, right, max_ins, *score1, *score2, max_ref2;
        int N, K, l_run, ref_type, n_alt;
        char *inscns = 0, *ref2, *query, **ref_sample;
@@ -318,8 +319,14 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
                        // align each read to ref2
                        for (i = 0; i < n_plp[s]; ++i, ++K) {
                                bam_pileup1_t *p = plp[s] + i;
-                               int qbeg, qend, tbeg, tend, sc;
+                               int qbeg, qend, tbeg, tend, sc, kk;
                                uint8_t *seq = bam1_seq(p->b);
+                               uint32_t *cigar = bam1_cigar(p->b);
+                               if (p->b->core.flag&4) continue; // unmapped reads
+                               // FIXME: the following loop should be better moved outside; nonetheless, realignment should be much slower anyway.
+                               for (kk = 0; kk < p->b->core.n_cigar; ++kk)
+                                       if ((cigar[kk]&BAM_CIGAR_MASK) == BAM_CREF_SKIP) break;
+                               if (kk < p->b->core.n_cigar) continue;
                                // FIXME: the following skips soft clips, but using them may be more sensitive.
                                // determine the start and end of sequences for alignment
                                qbeg = tpos2qpos(&p->b->core, bam1_cigar(p->b), left,  0, &tbeg);
@@ -414,9 +421,11 @@ int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_calla
                                indelQ2 = tmp > 111? 0 : (int)((1. - tmp/111.) * indelQ2 + .499);
                                // pick the smaller between indelQ1 and indelQ2
                                indelQ = indelQ1 < indelQ2? indelQ1 : indelQ2;
-                               p->aux = (sc[0]&0x3f)<<16 | seqQ<<8 | indelQ;
+                               if (indelQ > 255) indelQ = 255;
+                               if (seqQ > 255) seqQ = 255;
+                               p->aux = (sc[0]&0x3f)<<16 | seqQ<<8 | indelQ; // use 22 bits in total
                                sumq[sc[0]&0x3f] += indelQ < seqQ? indelQ : seqQ;
-//                             fprintf(stderr, "pos=%d read=%d:%d name=%s call=%d q=%d\n", pos, s, i, bam1_qname(p->b), types[sc[0]&0x3f], indelQ);
+//                             fprintf(stderr, "pos=%d read=%d:%d name=%s call=%d indelQ=%d seqQ=%d\n", pos, s, i, bam1_qname(p->b), types[sc[0]&0x3f], indelQ, seqQ);
                        }
                }
                // determine bca->indel_types[] and bca->inscns