#define MERGE_RG 1
#define MERGE_UNCOMP 2
#define MERGE_LEVEL1 4
+#define MERGE_FORCE 8
/*!
@abstract Merge multiple sorted BAM.
h->i = i;
h->b = (bam1_t*)calloc(1, sizeof(bam1_t));
if (bam_iter_read(fp[i], iter[i], h->b) >= 0) {
- h->pos = ((uint64_t)h->b->core.tid<<32) | (uint32_t)h->b->core.pos<<1 | bam1_strand(h->b);
+ h->pos = ((uint64_t)h->b->core.tid<<32) | (uint32_t)((int32_t)h->b->core.pos+1)<<1 | bam1_strand(h->b);
h->idx = idx++;
}
else h->pos = HEAP_EMPTY;
}
bam_write1_core(fpout, &b->core, b->data_len, b->data);
if ((j = bam_iter_read(fp[heap->i], iter[heap->i], b)) >= 0) {
- heap->pos = ((uint64_t)b->core.tid<<32) | (uint32_t)b->core.pos<<1 | bam1_strand(b);
+ heap->pos = ((uint64_t)b->core.tid<<32) | (uint32_t)((int)b->core.pos+1)<<1 | bam1_strand(b);
heap->idx = idx++;
} else if (j == -1) {
heap->pos = HEAP_EMPTY;
int c, is_by_qname = 0, flag = 0, ret = 0;
char *fn_headers = NULL, *reg = 0;
- while ((c = getopt(argc, argv, "h:nru1R:")) >= 0) {
+ while ((c = getopt(argc, argv, "h:nru1R:f")) >= 0) {
switch (c) {
case 'r': flag |= MERGE_RG; break;
+ case 'f': flag |= MERGE_FORCE; break;
case 'h': fn_headers = strdup(optarg); break;
case 'n': is_by_qname = 1; break;
case '1': flag |= MERGE_LEVEL1; break;
fprintf(stderr, "Options: -n sort by read names\n");
fprintf(stderr, " -r attach RG tag (inferred from file names)\n");
fprintf(stderr, " -u uncompressed BAM output\n");
+ fprintf(stderr, " -f overwrite the output BAM if exist\n");
fprintf(stderr, " -1 compress level 1\n");
fprintf(stderr, " -R STR merge file in the specified region STR [all]\n");
fprintf(stderr, " -h FILE copy the header in FILE to <out.bam> [in1.bam]\n\n");
fprintf(stderr, " the header dictionary in merging.\n\n");
return 1;
}
+ if (!(flag & MERGE_FORCE) && strcmp(argv[optind], "-")) {
+ FILE *fp = fopen(argv[optind], "rb");
+ if (fp != NULL) {
+ fclose(fp);
+ fprintf(stderr, "[%s] File '%s' exists. Please apply '-f' to overwrite. Abort.\n", __func__, argv[optind]);
+ return 1;
+ }
+ }
if (bam_merge_core(is_by_qname, argv[optind], fn_headers, argc - optind - 1, argv + optind + 1, flag, reg) < 0) ret = 1;
free(reg);
free(fn_headers);
{
if (g_is_by_qname) {
int t = strnum_cmp(bam1_qname(a), bam1_qname(b));
- return (t < 0 || (t == 0 && (((uint64_t)a->core.tid<<32|a->core.pos) < ((uint64_t)b->core.tid<<32|b->core.pos))));
- } else return (((uint64_t)a->core.tid<<32|a->core.pos) < ((uint64_t)b->core.tid<<32|b->core.pos));
+ return (t < 0 || (t == 0 && (((uint64_t)a->core.tid<<32|(a->core.pos+1)) < ((uint64_t)b->core.tid<<32|(b->core.pos+1)))));
+ } else return (((uint64_t)a->core.tid<<32|(a->core.pos+1)) < ((uint64_t)b->core.tid<<32|(b->core.pos+1)));
}
KSORT_INIT(sort, bam1_p, bam1_lt)
bam_sort_core_ext(is_by_qname, fn, prefix, max_mem, 0);
}
+
+size_t bam_sort_get_max_mem(char *max_mem_string)
+{
+ char c;
+ size_t max_mem;
+ size_t multiplier=1;
+ c=max_mem_string[strlen(max_mem_string)-1];
+ switch(c) {
+ case 'G':
+ multiplier*=1024;
+ case 'M':
+ multiplier*=1024;
+ case 'K':
+ multiplier*=1024;
+ case 'B':
+ max_mem_string[strlen(max_mem_string)-1]='\0';
+ break;
+ default:
+ break;
+ }
+ max_mem = multiplier * atol(max_mem_string);
+ // max_mem should be checked that it was not zero after atol!
+ return max_mem;
+}
+
int bam_sort(int argc, char *argv[])
{
size_t max_mem = 500000000;
switch (c) {
case 'o': is_stdout = 1; break;
case 'n': is_by_qname = 1; break;
- case 'm': max_mem = atol(optarg); break;
+ case 'm': max_mem = bam_sort_get_max_mem(optarg); break;
}
}
if (optind + 2 > argc) {