index = idx->index[i];
for (k = kh_begin(index); k != kh_end(index); ++k) {
bam_binlist_t *p;
- if (!kh_exist(index, k)) continue;
+ if (!kh_exist(index, k) || kh_key(index, k) == BAM_MAX_BIN) continue;
p = &kh_value(index, k);
m = 0;
for (l = 1; l < p->n; ++l) {
bam1_core_t *c;
uint64_t save_off, last_off, n_mapped, n_unmapped, off_beg, off_end, n_no_coor;
+ h = bam_header_read(fp);
+ if(h == 0) {
+ fprintf(stderr, "[bam_index_core] Invalid BAM header.");
+ return NULL;
+ }
+
idx = (bam_index_t*)calloc(1, sizeof(bam_index_t));
b = (bam1_t*)calloc(1, sizeof(bam1_t));
- h = bam_header_read(fp);
c = &b->core;
idx->n = h->n_targets;
save_bin = save_tid = last_tid = last_bin = 0xffffffffu;
save_off = last_off = bam_tell(fp); last_coor = 0xffffffffu;
- n_mapped = n_unmapped = n_no_coor = off_end = 0;
+ n_mapped = n_unmapped = n_no_coor = off_end = 0;
off_beg = off_end = bam_tell(fp);
while ((ret = bam_read1(fp, b)) >= 0) {
if (c->tid < 0) ++n_no_coor;
- if (last_tid != c->tid) { // change of chromosomes
+ if (last_tid < c->tid || (last_tid >= 0 && c->tid < 0)) { // change of chromosomes
last_tid = c->tid;
last_bin = 0xffffffffu;
- } else if (last_coor > c->pos) {
+ } else if ((uint32_t)last_tid > (uint32_t)c->tid) {
+ fprintf(stderr, "[bam_index_core] the alignment is not sorted (%s): %d-th chr > %d-th chr\n",
+ bam1_qname(b), last_tid+1, c->tid+1);
+ return NULL;
+ } else if ((int32_t)c->tid >= 0 && last_coor > c->pos) {
fprintf(stderr, "[bam_index_core] the alignment is not sorted (%s): %u > %u in %d-th chr\n",
bam1_qname(b), last_coor, c->pos, c->tid+1);
- exit(1);
+ return NULL;
}
- if (c->tid >= 0) insert_offset2(&idx->index2[b->core.tid], b, last_off);
+ if (c->tid >= 0 && !(c->flag & BAM_FUNMAP)) insert_offset2(&idx->index2[b->core.tid], b, last_off);
if (c->bin != last_bin) { // then possibly write the binning index
if (save_bin != 0xffffffffu) // save_bin==0xffffffffu only happens to the first record
insert_offset(idx->index[save_tid], save_bin, save_off, last_off);
if (bam_tell(fp) <= last_off) {
fprintf(stderr, "[bam_index_core] bug in BGZF/RAZF: %llx < %llx\n",
(unsigned long long)bam_tell(fp), (unsigned long long)last_off);
- exit(1);
+ return NULL;
}
if (c->flag & BAM_FUNMAP) ++n_unmapped;
else ++n_mapped;
}
if (save_tid >= 0) {
insert_offset(idx->index[save_tid], save_bin, save_off, bam_tell(fp));
- insert_offset(idx->index[save_tid], BAM_MAX_BIN, off_beg, off_end);
+ insert_offset(idx->index[save_tid], BAM_MAX_BIN, off_beg, bam_tell(fp));
insert_offset(idx->index[save_tid], BAM_MAX_BIN, n_mapped, n_unmapped);
}
merge_chunks(idx);
fill_missing(idx);
- if (ret >= 0)
- while ((ret = bam_read1(fp, b)) >= 0) ++n_no_coor;
+ if (ret >= 0) {
+ while ((ret = bam_read1(fp, b)) >= 0) {
+ ++n_no_coor;
+ if (c->tid >= 0 && n_no_coor) {
+ fprintf(stderr, "[bam_index_core] the alignment is not sorted: reads without coordinates prior to reads with coordinates.\n");
+ return NULL;
+ }
+ }
+ }
if (ret < -1) fprintf(stderr, "[bam_index_core] truncated file? Continue anyway. (%d)\n", ret);
free(b->data); free(b);
idx->n_no_coor = n_no_coor;
}
idx = bam_index_core(fp);
bam_close(fp);
+ if(idx == 0) {
+ fprintf(stderr, "[bam_index_build2] fail to index the BAM file.\n");
+ return -1;
+ }
if (_fnidx == 0) {
fnidx = (char*)calloc(strlen(fn) + 5, 1);
strcpy(fnidx, fn); strcat(fnidx, ".bai");
bam_index_t *idx;
bam_header_t *header;
bamFile fp;
- int i, no_stats = 0;
+ int i;
if (argc < 2) {
fprintf(stderr, "Usage: samtools idxstats <in.bam>\n");
return 1;
k = kh_get(i, h, BAM_MAX_BIN);
if (k != kh_end(h))
printf("\t%llu\t%llu", (long long)kh_val(h, k).list[1].u, (long long)kh_val(h, k).list[1].v);
- else no_stats = 1;
+ else printf("\t0\t0");
putchar('\n');
}
- printf("*\t0");
- if (!no_stats) printf("\t0\t%llu", (long long)idx->n_no_coor);
- putchar('\n');
+ printf("*\t0\t0\t%llu\n", (long long)idx->n_no_coor);
bam_header_destroy(header);
bam_index_destroy(idx);
return 0;
}
}
free(bins);
+ if (n_off == 0) {
+ free(off); return iter;
+ }
{
bam1_t *b = (bam1_t*)calloc(1, sizeof(bam1_t));
int l;
int bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b)
{
- if (iter->finished) return -1;
- if (iter->from_first) {
- int ret = bam_read1(fp, b);
- if (ret < 0) iter->finished = 1;
+ int ret;
+ if (iter && iter->finished) return -1;
+ if (iter == 0 || iter->from_first) {
+ ret = bam_read1(fp, b);
+ if (ret < 0 && iter) iter->finished = 1;
return ret;
}
if (iter->off == 0) return -1;
for (;;) {
- int ret;
if (iter->curr_off == 0 || iter->curr_off >= iter->off[iter->i].v) { // then jump to the next chunk
- if (iter->i == iter->n_off - 1) break; // no more chunks
+ if (iter->i == iter->n_off - 1) { ret = -1; break; } // no more chunks
if (iter->i >= 0) assert(iter->curr_off == iter->off[iter->i].v); // otherwise bug
if (iter->i < 0 || iter->off[iter->i].v != iter->off[iter->i+1].u) { // not adjacent chunks; then seek
bam_seek(fp, iter->off[iter->i+1].u, SEEK_SET);
}
++iter->i;
}
- if ((ret = bam_read1(fp, b)) > 0) {
+ if ((ret = bam_read1(fp, b)) >= 0) {
iter->curr_off = bam_tell(fp);
- if (b->core.tid != iter->tid || b->core.pos >= iter->end) break; // no need to proceed
+ if (b->core.tid != iter->tid || b->core.pos >= iter->end) { // no need to proceed
+ ret = bam_validate1(NULL, b)? -1 : -5; // determine whether end of region or error
+ break;
+ }
else if (is_overlap(iter->beg, iter->end, b)) return ret;
- } else break; // end of file
+ } else break; // end of file or error
}
iter->finished = 1;
- return -1;
+ return ret;
}
int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func)
{
+ int ret;
bam_iter_t iter;
bam1_t *b;
b = bam_init1();
iter = bam_iter_query(idx, tid, beg, end);
- while (bam_iter_read(fp, iter, b) >= 0) func(b, data);
+ while ((ret = bam_iter_read(fp, iter, b)) >= 0) func(b, data);
+ bam_iter_destroy(iter);
bam_destroy1(b);
- return 0;
+ return (ret == -1)? 0 : ret;
}