]> git.donarmstrong.com Git - samtools.git/commitdiff
Verify old BIN value of reads during 'samtools index'.
authorpeterjc <p.j.a.cock@googlemail.com>
Fri, 19 Apr 2013 14:13:32 +0000 (15:13 +0100)
committerpeterjc <p.j.a.cock@googlemail.com>
Fri, 19 Apr 2013 14:13:32 +0000 (15:13 +0100)
This simple check catches invalid BIN values in reads which can cause
subtle bugs due to position-based retrieval missing data, and simple
out of bounds errors if a BIN is used beyond the length of the reference.

bam_index.c

index f916e0461dca145499303db8edb011e6db86f793..35fb5b666e6d748804091d979342d1b67566412e 100644 (file)
@@ -154,7 +154,7 @@ bam_index_t *bam_index_core(bamFile fp)
        bam_header_t *h;
        int i, ret;
        bam_index_t *idx;
-       uint32_t last_bin, save_bin;
+       uint32_t last_bin, save_bin, recalculated_bin;
        int32_t last_coor, last_tid, save_tid;
        bam1_core_t *c;
        uint64_t save_off, last_off, n_mapped, n_unmapped, off_beg, off_end, n_no_coor;
@@ -193,6 +193,15 @@ bam_index_t *bam_index_core(bamFile fp)
                                        bam1_qname(b), last_coor, c->pos, c->tid+1);
                        return NULL;
                }
+               if (c->tid >= 0) {
+                       recalculated_bin = bam_reg2bin(c->pos, bam_calend(c, bam1_cigar(b)));
+                       if (c->bin != recalculated_bin) {
+                               fprintf(stderr, "[bam_index_core] read '%s' mapped at POS %d has BIN %d but should be %d\n",
+                                       bam1_qname(b), c->pos + 1, c->bin, recalculated_bin);
+                               fprintf(stderr, "[bam_index_core] Fix it by using BAM->SAM->BAM to force a recalculation of the BIN field\n");
+                               return NULL;
+                       }
+               }
                if (c->tid >= 0 && !(c->flag & BAM_FUNMAP)) insert_offset2(&idx->index2[b->core.tid], b, last_off);
                if (c->bin != last_bin) { // then possibly write the binning index
                        if (save_bin != 0xffffffffu) // save_bin==0xffffffffu only happens to the first record