X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=sam%2Fbam_index.c;h=f916e0461dca145499303db8edb011e6db86f793;hb=636b82d9f60ebcbec7ef1b73ba23bbbacfd8b36a;hp=328f011c16ccf4a9a558702317ecdecfbcc5212a;hpb=a97cc1d4f0111f7fe523227412a2147f7a763d56;p=rsem.git diff --git a/sam/bam_index.c b/sam/bam_index.c index 328f011..f916e04 100644 --- a/sam/bam_index.c +++ b/sam/bam_index.c @@ -159,9 +159,14 @@ bam_index_t *bam_index_core(bamFile fp) bam1_core_t *c; uint64_t save_off, last_off, n_mapped, n_unmapped, off_beg, off_end, n_no_coor; + h = bam_header_read(fp); + if(h == 0) { + fprintf(stderr, "[bam_index_core] Invalid BAM header."); + return NULL; + } + idx = (bam_index_t*)calloc(1, sizeof(bam_index_t)); b = (bam1_t*)calloc(1, sizeof(bam1_t)); - h = bam_header_read(fp); c = &b->core; idx->n = h->n_targets; @@ -172,19 +177,23 @@ bam_index_t *bam_index_core(bamFile fp) save_bin = save_tid = last_tid = last_bin = 0xffffffffu; save_off = last_off = bam_tell(fp); last_coor = 0xffffffffu; - n_mapped = n_unmapped = n_no_coor = off_end = 0; + n_mapped = n_unmapped = n_no_coor = off_end = 0; off_beg = off_end = bam_tell(fp); while ((ret = bam_read1(fp, b)) >= 0) { if (c->tid < 0) ++n_no_coor; - if (last_tid != c->tid) { // change of chromosomes + if (last_tid < c->tid || (last_tid >= 0 && c->tid < 0)) { // change of chromosomes last_tid = c->tid; last_bin = 0xffffffffu; - } else if (last_coor > c->pos) { + } else if ((uint32_t)last_tid > (uint32_t)c->tid) { + fprintf(stderr, "[bam_index_core] the alignment is not sorted (%s): %d-th chr > %d-th chr\n", + bam1_qname(b), last_tid+1, c->tid+1); + return NULL; + } else if ((int32_t)c->tid >= 0 && last_coor > c->pos) { fprintf(stderr, "[bam_index_core] the alignment is not sorted (%s): %u > %u in %d-th chr\n", bam1_qname(b), last_coor, c->pos, c->tid+1); - exit(1); + return NULL; } - if (c->tid >= 0) insert_offset2(&idx->index2[b->core.tid], b, last_off); + if (c->tid >= 0 && !(c->flag & BAM_FUNMAP)) insert_offset2(&idx->index2[b->core.tid], b, last_off); if (c->bin != last_bin) { // then possibly write the binning index if (save_bin != 0xffffffffu) // save_bin==0xffffffffu only happens to the first record insert_offset(idx->index[save_tid], save_bin, save_off, last_off); @@ -203,7 +212,7 @@ bam_index_t *bam_index_core(bamFile fp) if (bam_tell(fp) <= last_off) { fprintf(stderr, "[bam_index_core] bug in BGZF/RAZF: %llx < %llx\n", (unsigned long long)bam_tell(fp), (unsigned long long)last_off); - exit(1); + return NULL; } if (c->flag & BAM_FUNMAP) ++n_unmapped; else ++n_mapped; @@ -217,8 +226,15 @@ bam_index_t *bam_index_core(bamFile fp) } merge_chunks(idx); fill_missing(idx); - if (ret >= 0) - while ((ret = bam_read1(fp, b)) >= 0) ++n_no_coor; + if (ret >= 0) { + while ((ret = bam_read1(fp, b)) >= 0) { + ++n_no_coor; + if (c->tid >= 0 && n_no_coor) { + fprintf(stderr, "[bam_index_core] the alignment is not sorted: reads without coordinates prior to reads with coordinates.\n"); + return NULL; + } + } + } if (ret < -1) fprintf(stderr, "[bam_index_core] truncated file? Continue anyway. (%d)\n", ret); free(b->data); free(b); idx->n_no_coor = n_no_coor; @@ -448,6 +464,7 @@ bam_index_t *bam_index_load(const char *fn) strcat(strcpy(fnidx, fn), ".bai"); fprintf(stderr, "[bam_index_load] attempting to download the remote index file.\n"); download_from_remote(fnidx); + free(fnidx); idx = bam_index_load_local(fn); } if (idx == 0) fprintf(stderr, "[bam_index_load] fail to load BAM index.\n"); @@ -466,6 +483,10 @@ int bam_index_build2(const char *fn, const char *_fnidx) } idx = bam_index_core(fp); bam_close(fp); + if(idx == 0) { + fprintf(stderr, "[bam_index_build2] fail to index the BAM file.\n"); + return -1; + } if (_fnidx == 0) { fnidx = (char*)calloc(strlen(fn) + 5, 1); strcpy(fnidx, fn); strcat(fnidx, ".bai"); @@ -474,6 +495,7 @@ int bam_index_build2(const char *fn, const char *_fnidx) if (fpidx == 0) { fprintf(stderr, "[bam_index_build2] fail to create the index file.\n"); free(fnidx); + bam_index_destroy(idx); return -1; } bam_index_save(idx, fpidx);