X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bam_sort.c;h=402792aea44952a5962e340e9a0efbad75654494;hb=1a23a35867ee992dcffcf3d17df2b7d41732f33b;hp=c5ed5835b0c26cb4e3ed0a756af26ee8f6d76ef6;hpb=f93dae0d03856955f9424e8b2aaf261304ca647e;p=samtools.git diff --git a/bam_sort.c b/bam_sort.c index c5ed583..402792a 100644 --- a/bam_sort.c +++ b/bam_sort.c @@ -47,6 +47,16 @@ static inline int heap_lt(const heap1_t a, const heap1_t b) KSORT_INIT(heap, heap1_t, heap_lt) +/*! + @abstract Merge multiple sorted BAM. + @param is_by_qname whether to sort by query name + @param out output BAM file name + @param n number of files to be merged + @param fn names of files to be merged + + @discussion Padding information may NOT correctly maintained. This + function is NOT thread safe. + */ void bam_merge_core(int by_qname, const char *out, int n, char * const *fn) { bamFile fpout, *fp; @@ -66,13 +76,17 @@ void bam_merge_core(int by_qname, const char *out, int n, char * const *fn) else { // validate multiple baf if (hout->n_targets != hin->n_targets) { fprintf(stderr, "[bam_merge_core] file '%s' has different number of target sequences. Abort!\n", fn[i]); - abort(); + exit(1); } for (j = 0; j < hout->n_targets; ++j) { - if (strcmp(hout->target_name[j], hin->target_name[j]) || hout->target_len[j] != hin->target_len[j]) { - fprintf(stderr, "[bam_merge_core] file '%s' has a different target sequence. Abort!\n", fn[i]); - abort(); + if (strcmp(hout->target_name[j], hin->target_name[j])) { + fprintf(stderr, "[bam_merge_core] different target sequence name: '%s' != '%s' in file '%s'. Abort!\n", + hout->target_name[j], hin->target_name[j], fn[i]); + exit(1); } + if (hout->target_len[j] != hin->target_len[j]) + fprintf(stderr, "[bam_merge_core] different target sequence length: %d != %d in file '%s'. Continue.\n", + hout->target_len[j], hin->target_len[j], fn[i]); } bam_header_destroy(hin); } @@ -115,7 +129,7 @@ int bam_merge(int argc, char *argv[]) case 'n': is_by_qname = 1; break; } } - if (optind + 3 >= argc) { + if (optind + 2 >= argc) { fprintf(stderr, "Usage: samtools merge [-n] [...]\n"); return 1; } @@ -151,6 +165,20 @@ static void sort_blocks(int n, int k, bam1_p *buf, const char *prefix, const bam bam_close(fp); } +/*! + @abstract Sort an unsorted BAM file based on the chromosome order + and the leftmost position of an alignment + + @param is_by_qname whether to sort by query name + @param fn name of the file to be sorted + @param prefix prefix of the output and the temporary files; upon + sucessess, prefix.bam will be written. + @param max_mem approxiate maximum memory (very inaccurate) + + @discussion It may create multiple temporary subalignment files + and then merge them by calling bam_merge_core(). This function is + NOT thread safe. + */ void bam_sort_core(int is_by_qname, const char *fn, const char *prefix, size_t max_mem) { int n, ret, k, i; @@ -191,7 +219,7 @@ void bam_sort_core(int is_by_qname, const char *fn, const char *prefix, size_t m fns[i] = (char*)calloc(strlen(prefix) + 20, 1); sprintf(fns[i], "%s.%.4d.bam", prefix, i); } - bam_merge_core(0, fnout, n, fns); + bam_merge_core(is_by_qname, fnout, n, fns); free(fnout); for (i = 0; i < n; ++i) { unlink(fns[i]); @@ -221,7 +249,7 @@ int bam_sort(int argc, char *argv[]) } } if (optind + 2 > argc) { - fprintf(stderr, "Usage: samtools sort [-n] [-m ] \n"); + fprintf(stderr, "Usage: samtools sort [-n] [-m ] \n"); return 1; } bam_sort_core(is_by_qname, argv[optind], argv[optind+1], max_mem);