]> git.donarmstrong.com Git - samtools.git/blobdiff - sam.c
* samtools-0.1.3-14 (r262)
[samtools.git] / sam.c
diff --git a/sam.c b/sam.c
index dd6cd3a8d8fa007bda51b897a59dd2694d25d3d7..671b5042bc17ec1948c83fe54093c03ff7065120 100644 (file)
--- a/sam.c
+++ b/sam.c
@@ -22,92 +22,61 @@ bam_header_t *bam_header_dup(const bam_header_t *h0)
        return h;
 }
 
-bam_header_t *bam_header_parse(const char *text)
-{
-       bam_header_t *h;
-       int i;
-       char *s, *p, *q, *r;
-
-       i = strlen(text);
-       if (i < 3) return 0; // headerless
-       h = bam_header_init();
-       h->l_text = i;
-       h->text = strdup(text);
-       s = h->text;
-       while ((s = strstr(s, "@SQ")) != 0) {
-               ++h->n_targets;
-               s += 3;
-       }
-       h->target_len = (uint32_t*)calloc(h->n_targets, 4);
-       h->target_name = (char**)calloc(h->n_targets, sizeof(void*));
-       i = 0;
-       s = h->text;
-       while ((s = strstr(s, "@SQ")) != 0) {
-               s += 3;
-               r = s;
-               if ((p = strstr(s, "SN:")) != 0) {
-                       q = p + 3;
-                       for (p = q; *p && *p != '\t'; ++p);
-                       h->target_name[i] = (char*)calloc(p - q + 1, 1);
-                       strncpy(h->target_name[i], q, p - q);
-               } else goto header_err_ret;
-               if (r < p) r = p;
-               if ((p = strstr(s, "LN:")) != 0) h->target_len[i] = strtol(p + 3, 0, 10);
-               else goto header_err_ret;
-               if (r < p) r = p;
-               s = r + 3;
-               ++i;
-       }
-       if (h->n_targets == 0) {
-               bam_header_destroy(h);
-               return 0;
-       } else return h;
-
-header_err_ret:
-       fprintf(stderr, "[bam_header_parse] missing SN tag in a @SQ line.\n");
-       bam_header_destroy(h);
-       return 0;
-}
-
 samfile_t *samopen(const char *fn, const char *mode, const void *aux)
 {
        samfile_t *fp;
        fp = (samfile_t*)calloc(1, sizeof(samfile_t));
-       if (mode[0] == 'r') {
-               const char *fn_list = (const char*)aux;
+       if (mode[0] == 'r') { // read
                fp->type |= TYPE_READ;
                if (mode[1] == 'b') { // binary
                        fp->type |= TYPE_BAM;
                        fp->x.bam = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r");
                        if (fp->x.bam == 0) goto open_err_ret;
                        fp->header = bam_header_read(fp->x.bam);
-               } else {
+               } else { // text
                        fp->x.tamr = sam_open(fn);
                        if (fp->x.tamr == 0) goto open_err_ret;
-                       fp->header = sam_header_read2(fn_list);
+                       fp->header = sam_header_read(fp->x.tamr);
+                       if (fp->header->n_targets == 0) { // no @SQ fields
+                               if (aux) { // check if aux is present
+                                       bam_header_destroy(fp->header);
+                                       fp->header = sam_header_read2((const char*)aux);
+                               }
+                               if (fp->header->n_targets == 0)
+                                       fprintf(stderr, "[samopen] empty header.\n");
+                       } else fprintf(stderr, "[samopen] SAM header is present: %d sequences.\n", fp->header->n_targets);
                }
-       } else if (mode[0] == 'w') {
+       } else if (mode[0] == 'w') { // write
                fp->header = bam_header_dup((const bam_header_t*)aux);
                if (mode[1] == 'b') { // binary
                        fp->type |= TYPE_BAM;
                        fp->x.bam = strcmp(fn, "-")? bam_open(fn, "w") : bam_dopen(fileno(stdout), "w");
                        if (fp->x.bam == 0) goto open_err_ret;
                        bam_header_write(fp->x.bam, fp->header);
-               } else {
-                       int i;
-                       bam_header_t *alt = 0;
-                       alt = bam_header_parse(fp->header->text);
+               } else { // text
+                       // open file
                        fp->x.tamw = strcmp(fn, "-")? fopen(fn, "w") : stdout;
                        if (fp->x.tamr == 0) goto open_err_ret;
-                       if (alt) {
-                               if (alt->n_targets != fp->header->n_targets)
-                                       fprintf(stderr, "[samopen] inconsistent number of target sequences.\n");
+                       // write header
+                       if (strstr(mode, "h")) {
+                               int i;
+                               bam_header_t *alt;
+                               // parse the header text 
+                               alt = bam_header_init();
+                               alt->l_text = fp->header->l_text; alt->text = fp->header->text;
+                               sam_header_parse(alt);
+                               alt->l_text = 0; alt->text = 0;
+                               // check if there are @SQ lines in the header
+                               if (alt->n_targets) { // then write the header text without dumping ->target_{name,len}
+                                       if (alt->n_targets != fp->header->n_targets)
+                                               fprintf(stderr, "[samopen] inconsistent number of target sequences.\n");
+                                       fwrite(fp->header->text, 1, fp->header->l_text, fp->x.tamw);
+                               } else { // then dump ->target_{name,len}
+                                       for (i = 0; i < fp->header->n_targets; ++i)
+                                               fprintf(fp->x.tamw, "@SQ\tSN:%s\tLN:%d\n", fp->header->target_name[i], fp->header->target_len[i]);
+                               }
                                bam_header_destroy(alt);
-                               fwrite(fp->header->text, 1, fp->header->l_text, fp->x.tamw);
                        }
-                       if (strstr(mode, "h")) // print header
-                               for (i = 0; i < fp->header->n_targets; ++i)
-                                       fprintf(fp->x.tamw, "@SQ\tSN:%s\tLN:%d\n", fp->header->target_name[i], fp->header->target_len[i]);
                }
        }
        return fp;