X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bam_index.c;h=347189d63e397d4d433cc42783aea3a07545e9e2;hb=3ddb3942053df00fdae714e77cbc2f5618db617e;hp=8c2555232772ee6571f0da19593c5b2764b04898;hpb=47c65ef17d8949832a0c79c552118b31154bc2ad;p=samtools.git diff --git a/bam_index.c b/bam_index.c index 8c25552..347189d 100644 --- a/bam_index.c +++ b/bam_index.c @@ -155,7 +155,8 @@ bam_index_t *bam_index_core(bamFile fp) last_tid = c->tid; last_bin = 0xffffffffu; } else if (last_coor > c->pos) { - fprintf(stderr, "[bam_index_core] the alignment is not sorted. Abort!\n"); + fprintf(stderr, "[bam_index_core] the alignment is not sorted (%s): %u > %u in %d-th chr\n", + bam1_qname(b), last_coor, c->pos, c->tid+1); exit(1); } if (b->core.tid >= 0 && b->core.bin < 4681) insert_offset2(&idx->index2[b->core.tid], b, last_off); @@ -268,7 +269,16 @@ bam_index_t *bam_index_load(const char *fn) fnidx = (char*)calloc(strlen(fn) + 5, 1); strcpy(fnidx, fn); strcat(fnidx, ".bai"); - if ((fp = fopen(fnidx, "r")) == 0) { + fp = fopen(fnidx, "r"); + if (fp == 0) { // try "{base}.bai" + char *s = strstr(fn, "bam"); + if (s == fn + strlen(fn) - 3) { + strcpy(fnidx, fn); + fnidx[strlen(fn)-1] = 'i'; + fp = fopen(fnidx, "r"); + } + } + if (fp == 0) { fprintf(stderr, "[bam_index_load] the alignment is not indexed. Please run `index' command first. Abort!\n"); exit(1); } @@ -327,7 +337,7 @@ bam_index_t *bam_index_load(const char *fn) return idx; } -int bam_index_build(const char *fn) +int bam_index_build2(const char *fn, const char *_fnidx) { char *fnidx; FILE *fpidx; @@ -336,9 +346,16 @@ int bam_index_build(const char *fn) assert(fp = bam_open(fn, "r")); idx = bam_index_core(fp); bam_close(fp); - fnidx = (char*)calloc(strlen(fn) + 5, 1); - strcpy(fnidx, fn); strcat(fnidx, ".bai"); - assert(fpidx = fopen(fnidx, "w")); + if (_fnidx == 0) { + fnidx = (char*)calloc(strlen(fn) + 5, 1); + strcpy(fnidx, fn); strcat(fnidx, ".bai"); + } else fnidx = strdup(_fnidx); + fpidx = fopen(fnidx, "w"); + if (fpidx == 0) { + fprintf(stderr, "[bam_index_build2] fail to create the index file.\n"); + free(fnidx); + return 1; + } bam_index_save(idx, fpidx); bam_index_destroy(idx); fclose(fpidx); @@ -346,13 +363,19 @@ int bam_index_build(const char *fn) return 0; } +int bam_index_build(const char *fn) +{ + return bam_index_build2(fn, 0); +} + int bam_index(int argc, char *argv[]) { if (argc < 2) { - fprintf(stderr, "Usage: samtools index \n"); + fprintf(stderr, "Usage: samtools index []\n"); return 1; } - bam_index_build(argv[1]); + if (argc >= 3) bam_index_build2(argv[1], argv[2]); + else bam_index_build(argv[1]); return 0; } @@ -374,7 +397,7 @@ static inline int reg2bins(uint32_t beg, uint32_t end, uint16_t list[MAX_BIN]) static inline int is_overlap(uint32_t beg, uint32_t end, const bam1_t *b) { uint32_t rbeg = b->core.pos; - uint32_t rend = bam_calend(&b->core, bam1_cigar(b)); + uint32_t rend = b->core.n_cigar? bam_calend(&b->core, bam1_cigar(b)) : b->core.pos + 1; return (rend > beg && rbeg < end); } @@ -410,16 +433,20 @@ int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, voi free(bins); { bam1_t *b; - int ret, n_seeks; + int l, ret, n_seeks; uint64_t curr_off; b = (bam1_t*)calloc(1, sizeof(bam1_t)); ks_introsort(off, n_off, off); - // resolve overlaps between adjecent blocks; this may happen due to the merge in indexing + // resolve completely contained adjacent blocks + for (i = 1, l = 0; i < n_off; ++i) + if (off[l].v < off[i].v) + off[++l] = off[i]; + n_off = l + 1; + // resolve overlaps between adjacent blocks; this may happen due to the merge in indexing for (i = 1; i < n_off; ++i) if (off[i-1].v >= off[i].u) off[i-1].v = off[i].u; { // merge adjacent blocks #if defined(BAM_TRUE_OFFSET) || defined(BAM_VIRTUAL_OFFSET16) - int l; for (i = 1, l = 0; i < n_off; ++i) { #ifdef BAM_TRUE_OFFSET if (off[l].v + BAM_MIN_CHUNK_GAP > off[i].u) off[l].v = off[i].v;