]> git.donarmstrong.com Git - samtools.git/blobdiff - bam_sort.c
added the phase command
[samtools.git] / bam_sort.c
index 4745be6cbf003c6a46a7882762c740c25493f932..38f15d655c253dc1f7e8cd50dd39c178c448f584 100644 (file)
@@ -1,6 +1,7 @@
 #include <stdlib.h>
 #include <ctype.h>
 #include <assert.h>
+#include <errno.h>
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
@@ -51,6 +52,14 @@ static inline int heap_lt(const heap1_t a, const heap1_t b)
 
 KSORT_INIT(heap, heap1_t, heap_lt)
 
+static void swap_header_targets(bam_header_t *h1, bam_header_t *h2)
+{
+       bam_header_t t;
+       t.n_targets = h1->n_targets, h1->n_targets = h2->n_targets, h2->n_targets = t.n_targets;
+       t.target_name = h1->target_name, h1->target_name = h2->target_name, h2->target_name = t.target_name;
+       t.target_len = h1->target_len, h1->target_len = h2->target_len, h2->target_len = t.target_len;
+}
+
 static void swap_header_text(bam_header_t *h1, bam_header_t *h2)
 {
        int tempi;
@@ -89,11 +98,12 @@ int bam_merge_core(int by_qname, const char *out, const char *headers, int n, ch
        if (headers) {
                tamFile fpheaders = sam_open(headers);
                if (fpheaders == 0) {
-                       fprintf(stderr, "[bam_merge_core] Cannot open file `%s'. Continue anyway.\n", headers);
-               } else {
-                       hheaders = sam_header_read(fpheaders);
-                       sam_close(fpheaders);
+                       const char *message = strerror(errno);
+                       fprintf(stderr, "[bam_merge_core] cannot open '%s': %s\n", headers, message);
+                       return -1;
                }
+               hheaders = sam_header_read(fpheaders);
+               sam_close(fpheaders);
        }
 
        g_is_by_qname = by_qname;
@@ -128,39 +138,51 @@ int bam_merge_core(int by_qname, const char *out, const char *headers, int n, ch
                        return -1;
                }
                hin = bam_header_read(fp[i]);
-               if (i == 0) { // the first SAM
+               if (i == 0) { // the first BAM
                        hout = hin;
-                       if (hheaders) {
-                               // If the text headers to be swapped in include any @SQ headers,
-                               // check that they are consistent with the existing binary list
-                               // of reference information.
-                               if (hheaders->n_targets > 0) {
-                                       if (hout->n_targets != hheaders->n_targets)
-                                               fprintf(stderr, "[bam_merge_core] number of @SQ headers in `%s' differs from number of target sequences", headers);
-                                       for (j = 0; j < hout->n_targets; ++j)
-                                               if (strcmp(hout->target_name[j], hheaders->target_name[j]) != 0)
-                                                       fprintf(stderr, "[bam_merge_core] @SQ header '%s' in '%s' differs from target sequence", hheaders->target_name[j], headers);
-                               }
-                               swap_header_text(hout, hheaders);
-                               bam_header_destroy(hheaders);
-                               hheaders = NULL;
-                       }
                } else { // validate multiple baf
-                       if (hout->n_targets != hin->n_targets) {
-                               fprintf(stderr, "[bam_merge_core] file '%s' has different number of target sequences. Abort!\n", fn[i]);
-                               exit(1);
-                       }
-                       for (j = 0; j < hout->n_targets; ++j) {
-                               if (strcmp(hout->target_name[j], hin->target_name[j])) {
-                                       fprintf(stderr, "[bam_merge_core] different target sequence name: '%s' != '%s' in file '%s'. Abort!\n",
+                       int min_n_targets = hout->n_targets;
+                       if (hin->n_targets < min_n_targets) min_n_targets = hin->n_targets;
+
+                       for (j = 0; j < min_n_targets; ++j)
+                               if (strcmp(hout->target_name[j], hin->target_name[j]) != 0) {
+                                       fprintf(stderr, "[bam_merge_core] different target sequence name: '%s' != '%s' in file '%s'\n",
                                                        hout->target_name[j], hin->target_name[j], fn[i]);
-                                       exit(1);
+                                       return -1;
                                }
+
+                       // If this input file has additional target reference sequences,
+                       // add them to the headers to be output
+                       if (hin->n_targets > hout->n_targets) {
+                               swap_header_targets(hout, hin);
+                               // FIXME Possibly we should also create @SQ text headers
+                               // for the newly added reference sequences
                        }
+
                        bam_header_destroy(hin);
                }
        }
 
+       if (hheaders) {
+               // If the text headers to be swapped in include any @SQ headers,
+               // check that they are consistent with the existing binary list
+               // of reference information.
+               if (hheaders->n_targets > 0) {
+                       if (hout->n_targets != hheaders->n_targets) {
+                               fprintf(stderr, "[bam_merge_core] number of @SQ headers in '%s' differs from number of target sequences\n", headers);
+                               if (!reg) return -1;
+                       }
+                       for (j = 0; j < hout->n_targets; ++j)
+                               if (strcmp(hout->target_name[j], hheaders->target_name[j]) != 0) {
+                                       fprintf(stderr, "[bam_merge_core] @SQ header '%s' in '%s' differs from target sequence\n", hheaders->target_name[j], headers);
+                                       if (!reg) return -1;
+                               }
+               }
+
+               swap_header_text(hout, hheaders);
+               bam_header_destroy(hheaders);
+       }
+
        if (reg) {
                int tid, beg, end;
                if (bam_parse_region(hout, reg, &tid, &beg, &end) < 0) {
@@ -200,8 +222,11 @@ int bam_merge_core(int by_qname, const char *out, const char *headers, int n, ch
        ks_heapmake(heap, n, heap);
        while (heap->pos != HEAP_EMPTY) {
                bam1_t *b = heap->b;
-               if ((flag & MERGE_RG) && bam_aux_get(b, "RG") == 0)
+               if (flag & MERGE_RG) {
+                       uint8_t *rg = bam_aux_get(b, "RG");
+                       if (rg) bam_aux_del(b, rg);
                        bam_aux_append(b, "RG", 'Z', RG_len[heap->i] + 1, (uint8_t*)RG[heap->i]);
+               }
                bam_write1_core(fpout, &b->core, b->data_len, b->data);
                if ((j = bam_iter_read(fp[heap->i], iter[heap->i], b)) >= 0) {
                        heap->pos = ((uint64_t)b->core.tid<<32) | (uint32_t)b->core.pos<<1 | bam1_strand(b);
@@ -229,7 +254,7 @@ int bam_merge_core(int by_qname, const char *out, const char *headers, int n, ch
 
 int bam_merge(int argc, char *argv[])
 {
-       int c, is_by_qname = 0, flag = 0;
+       int c, is_by_qname = 0, flag = 0, ret = 0;
        char *fn_headers = NULL, *reg = 0;
 
        while ((c = getopt(argc, argv, "h:nruR:")) >= 0) {
@@ -254,10 +279,10 @@ int bam_merge(int argc, char *argv[])
                fprintf(stderr, "      the header dictionary in merging.\n\n");
                return 1;
        }
-       bam_merge_core(is_by_qname, argv[optind], fn_headers, argc - optind - 1, argv + optind + 1, flag, reg);
+       if (bam_merge_core(is_by_qname, argv[optind], fn_headers, argc - optind - 1, argv + optind + 1, flag, reg) < 0) ret = 1;
        free(reg);
        free(fn_headers);
-       return 0;
+       return ret;
 }
 
 typedef bam1_t *bam1_p;