X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=sam.c;h=5b02abbc94daad6f5eb7b19f596fba6d9dc54109;hb=1a23a35867ee992dcffcf3d17df2b7d41732f33b;hp=85343398a8d5356a7ce1343970653adb5204a652;hpb=7a36e408af60d3a48eb8e2fda313f4ee748ad0fc;p=samtools.git diff --git a/sam.c b/sam.c index 8534339..5b02abb 100644 --- a/sam.c +++ b/sam.c @@ -19,94 +19,75 @@ bam_header_t *bam_header_dup(const bam_header_t *h0) h->target_len[i] = h0->target_len[i]; h->target_name[i] = strdup(h0->target_name[i]); } + if (h0->rg2lib) h->rg2lib = bam_strmap_dup(h0->rg2lib); return h; } -bam_header_t *bam_header_parse(const char *text) -{ - bam_header_t *h; - int i; - char *s, *p, *q, *r; - - i = strlen(text); - if (i < 3) return 0; // headerless - h = bam_header_init(); - h->l_text = i; - h->text = strdup(text); - s = h->text; - while ((s = strstr(s, "@SQ")) != 0) { - ++h->n_targets; - s += 3; - } - h->target_len = (uint32_t*)calloc(h->n_targets, 4); - h->target_name = (char**)calloc(h->n_targets, sizeof(void*)); - i = 0; - s = h->text; - while ((s = strstr(s, "@SQ")) != 0) { - s += 3; - r = s; - if ((p = strstr(s, "SN:")) != 0) { - q = p + 3; - for (p = q; *p && *p != '\t'; ++p); - h->target_name[i] = (char*)calloc(p - q + 1, 1); - strncpy(h->target_name[i], q, p - q); - } else goto header_err_ret; - if (r < p) r = p; - if ((p = strstr(s, "LN:")) != 0) h->target_len[i] = strtol(p + 3, 0, 10); - else goto header_err_ret; - if (r < p) r = p; - s = r + 3; - ++i; - } - if (h->n_targets == 0) { - bam_header_destroy(h); - return 0; - } else return h; - -header_err_ret: - fprintf(stderr, "[bam_header_parse] missing SN tag in a @SQ line.\n"); - bam_header_destroy(h); - return 0; -} - samfile_t *samopen(const char *fn, const char *mode, const void *aux) { samfile_t *fp; fp = (samfile_t*)calloc(1, sizeof(samfile_t)); - if (mode[0] == 'r') { - const char *fn_list = (const char*)aux; + if (mode[0] == 'r') { // read fp->type |= TYPE_READ; if (mode[1] == 'b') { // binary fp->type |= TYPE_BAM; fp->x.bam = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r"); + if (fp->x.bam == 0) goto open_err_ret; fp->header = bam_header_read(fp->x.bam); - } else { + } else { // text fp->x.tamr = sam_open(fn); - fp->header = sam_header_read2(fn_list); + if (fp->x.tamr == 0) goto open_err_ret; + fp->header = sam_header_read(fp->x.tamr); + if (fp->header->n_targets == 0) { // no @SQ fields + if (aux) { // check if aux is present + bam_header_destroy(fp->header); + fp->header = sam_header_read2((const char*)aux); + } + if (fp->header->n_targets == 0) + fprintf(stderr, "[samopen] no @SQ lines in the header.\n"); + } else fprintf(stderr, "[samopen] SAM header is present: %d sequences.\n", fp->header->n_targets); } - } else if (mode[0] == 'w') { + sam_header_parse_rg(fp->header); + } else if (mode[0] == 'w') { // write fp->header = bam_header_dup((const bam_header_t*)aux); if (mode[1] == 'b') { // binary + char bmode[3]; + bmode[0] = 'w'; bmode[1] = strstr(mode, "u")? 'u' : 0; bmode[2] = 0; fp->type |= TYPE_BAM; - fp->x.bam = strcmp(fn, "-")? bam_open(fn, "w") : bam_dopen(fileno(stdout), "w"); + fp->x.bam = strcmp(fn, "-")? bam_open(fn, bmode) : bam_dopen(fileno(stdout), bmode); + if (fp->x.bam == 0) goto open_err_ret; bam_header_write(fp->x.bam, fp->header); - } else { - int i; - bam_header_t *alt = 0; - alt = bam_header_parse(fp->header->text); + } else { // text + // open file fp->x.tamw = strcmp(fn, "-")? fopen(fn, "w") : stdout; - if (alt) { - if (alt->n_targets != fp->header->n_targets) - fprintf(stderr, "[samopen] inconsistent number of target sequences.\n"); - bam_header_destroy(alt); + if (fp->x.tamr == 0) goto open_err_ret; + // write header + if (strstr(mode, "h")) { + int i; + bam_header_t *alt; + // parse the header text + alt = bam_header_init(); + alt->l_text = fp->header->l_text; alt->text = fp->header->text; + sam_header_parse(alt); + alt->l_text = 0; alt->text = 0; + // check if there are @SQ lines in the header fwrite(fp->header->text, 1, fp->header->l_text, fp->x.tamw); + if (alt->n_targets) { // then write the header text without dumping ->target_{name,len} + if (alt->n_targets != fp->header->n_targets) + fprintf(stderr, "[samopen] inconsistent number of target sequences.\n"); + } else { // then dump ->target_{name,len} + for (i = 0; i < fp->header->n_targets; ++i) + fprintf(fp->x.tamw, "@SQ\tSN:%s\tLN:%d\n", fp->header->target_name[i], fp->header->target_len[i]); + } + bam_header_destroy(alt); } - if (strstr(mode, "h")) // print header - for (i = 0; i < fp->header->n_targets; ++i) - fprintf(fp->x.tamw, "@SQ\tSN:%s\tLN:%d\n", fp->header->target_name[i], fp->header->target_len[i]); } } return fp; + +open_err_ret: + free(fp); + return 0; } void samclose(samfile_t *fp)