X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bam_import.c;h=fccaa022208131b27093a2b44f32e74d13a469c0;hb=3ddb3942053df00fdae714e77cbc2f5618db617e;hp=77e324319525b529ac456aad2e9d9f84769e7b2d;hpb=5453d1b09f745d2e23e6394c2d329d19da7db061;p=samtools.git diff --git a/bam_import.c b/bam_import.c index 77e3243..fccaa02 100644 --- a/bam_import.c +++ b/bam_import.c @@ -5,6 +5,7 @@ #include #include #include +#include "kstring.h" #include "bam.h" #include "kseq.h" #include "khash.h" @@ -146,6 +147,62 @@ static inline void append_text(bam_header_t *header, kstring_t *str) header->text[header->l_text] = 0; } +int sam_header_parse_rg(bam_header_t *h) +{ + kstring_t *rgid, *rglib; + char *p, *q, *s, *r; + int n = 0; + + // free + if (h == 0) return 0; + bam_strmap_destroy(h->rg2lib); h->rg2lib = 0; + if (h->l_text < 3) return 0; + // parse @RG lines + h->rg2lib = bam_strmap_init(); + rgid = calloc(1, sizeof(kstring_t)); + rglib = calloc(1, sizeof(kstring_t)); + s = h->text; + while ((s = strstr(s, "@RG")) != 0) { + if (rgid->l && rglib->l) { + bam_strmap_put(h->rg2lib, rgid->s, rglib->s); + ++n; + } + rgid->l = rglib->l = 0; + s += 3; + r = s; + if ((p = strstr(s, "ID:")) != 0) { + q = p + 3; + for (p = q; *p && *p != '\t' && *p != '\r' && *p != '\n'; ++p); + kputsn(q, p - q, rgid); + } else { + fprintf(stderr, "[bam_header_parse] missing ID tag in @RG lines.\n"); + break; + } + if (r < p) r = p; + if ((p = strstr(s, "LB:")) != 0) { + q = p + 3; + for (p = q; *p && *p != '\t' && *p != '\r' && *p != '\n'; ++p); + kputsn(q, p - q, rglib); + } else { + fprintf(stderr, "[bam_header_parse] missing LB tag in @RG lines.\n"); + break; + } + if (r < p) r = p; + s = r + 3; + } + if (rgid->l && rglib->l) { + bam_strmap_put(h->rg2lib, rgid->s, rglib->s); + ++n; + } + free(rgid->s); free(rgid); + free(rglib->s); free(rglib); + if (n == 0) { + bam_strmap_destroy(h->rg2lib); + h->rg2lib = 0; + } + return n; +} + int sam_header_parse(bam_header_t *h) { int i; @@ -183,6 +240,7 @@ int sam_header_parse(bam_header_t *h) s = r + 3; ++i; } + sam_header_parse_rg(h); return h->n_targets; header_err_ret: