X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bam_aux.c;h=a63e2aeadccf35e95b786097cd33394cb31a7a06;hb=1a23a35867ee992dcffcf3d17df2b7d41732f33b;hp=081f07bc367a57fe2bd139b4fd82ea73421b5d5d;hpb=f93dae0d03856955f9424e8b2aaf261304ca647e;p=samtools.git diff --git a/bam_aux.c b/bam_aux.c index 081f07b..a63e2ae 100644 --- a/bam_aux.c +++ b/bam_aux.c @@ -1,8 +1,48 @@ #include #include "bam.h" #include "khash.h" -KHASH_MAP_INIT_INT(aux, uint8_t*) +typedef char *str_p; KHASH_MAP_INIT_STR(s, int) +KHASH_MAP_INIT_STR(r2l, str_p) + +void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data) +{ + int ori_len = b->data_len; + b->data_len += 3 + len; + b->l_aux += 3 + len; + if (b->m_data < b->data_len) { + b->m_data = b->data_len; + kroundup32(b->m_data); + b->data = (uint8_t*)realloc(b->data, b->m_data); + } + b->data[ori_len] = tag[0]; b->data[ori_len + 1] = tag[1]; + b->data[ori_len + 2] = type; + memcpy(b->data + ori_len + 3, data, len); +} +/* +uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]) +{ + return bam_aux_get(b, tag); +} +*/ +uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]) +{ + uint8_t *s; + int y = tag[0]<<8 | tag[1]; + s = bam1_aux(b); + while (s < b->data + b->data_len) { + int type, x = (int)s[0]<<8 | s[1]; + s += 2; + if (x == y) return s; + type = toupper(*s); ++s; + if (type == 'C') ++s; + else if (type == 'S') s += 2; + else if (type == 'I' || type == 'F') s += 4; + else if (type == 'D') s += 8; + else if (type == 'Z' || type == 'H') { while (*s) putchar(*s++); ++s; } + } + return 0; +} void bam_init_header_hash(bam_header_t *header) { @@ -41,7 +81,7 @@ void bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *b bam_init_header_hash(header); h = (khash_t(s)*)header->hash; - + l = strlen(str); p = s = (char*)malloc(l+1); /* squeeze out "," */ @@ -71,90 +111,118 @@ void bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *b free(s); } -void bam_aux_init(bam1_t *b) -{ - khash_t(aux) *h; - uint8_t *s; - if (b->hash == 0) { - h = kh_init(aux); - b->hash = h; - } else { - h = (khash_t(aux)*)b->hash; - kh_clear(aux, h); - } - s = bam1_aux(b); - while (s < b->data + b->data_len) { - uint32_t x = (uint32_t)s[0]<<8 | s[1]; - int ret, type; - khint_t k; - s += 2; type = toupper(*s); ++s; - k = kh_put(aux, h, x, &ret); - kh_value(h, k) = s; - if (type == 'C') ++s; - else if (type == 'S') s += 2; - else if (type == 'I') s += 4; - else if (type == 'F') s += 4; - else if (type == 'Z') { while (*s) putchar(*s++); ++s; } - } -} -void bam_aux_destroy(bam1_t *b) -{ - khash_t(aux) *h = (khash_t(aux)*)b->hash; - kh_destroy(aux, h); - b->hash = 0; -} -static uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]) -{ - uint32_t x = (uint32_t)tag[0]<<8 | tag[1]; - khint_t k; - khash_t(aux) *h; - if (b->hash == 0) bam_aux_init(b); - h = (khash_t(aux)*)b->hash; - k = kh_get(aux, h, x); - if (k == kh_end(h)) return 0; - return kh_value(h, k); -} -int32_t bam_aux_geti(bam1_t *b, const char tag[2], int *err) +int32_t bam_aux2i(const uint8_t *s) { int type; - uint8_t *s = bam_aux_get_core(b, tag); - *err = 0; - if (s == 0) { *err = -1; return 0; } + if (s == 0) return 0; type = *s++; if (type == 'c') return (int32_t)*(int8_t*)s; else if (type == 'C') return (int32_t)*(uint8_t*)s; else if (type == 's') return (int32_t)*(int16_t*)s; else if (type == 'S') return (int32_t)*(uint16_t*)s; else if (type == 'i' || type == 'I') return *(int32_t*)s; - else { *err = -2; return 0; } + else return 0; } -float bam_aux_getf(bam1_t *b, const char tag[2], int *err) + +float bam_aux2f(const uint8_t *s) { int type; - uint8_t *s = bam_aux_get_core(b, tag); - *err = 0; type = *s++; - if (s == 0) { *err = -1; return 0; } + if (s == 0) return 0.0; if (type == 'f') return *(float*)s; - else { *err = -2; return 0; } + else return 0.0; } -char bam_aux_getc(bam1_t *b, const char tag[2], int *err) + +double bam_aux2d(const uint8_t *s) { int type; - uint8_t *s = bam_aux_get_core(b, tag); - *err = 0; type = *s++; - if (s == 0) { *err = -1; return 0; } - if (type == 'c') return *(char*)s; - else { *err = -2; return 0; } + if (s == 0) return 0.0; + if (type == 'd') return *(double*)s; + else return 0.0; } -char *bam_aux_getZH(bam1_t *b, const char tag[2], int *err) + +char bam_aux2A(const uint8_t *s) +{ + int type; + type = *s++; + if (s == 0) return 0; + if (type == 'A') return *(char*)s; + else return 0; +} + +char *bam_aux2Z(const uint8_t *s) { int type; - uint8_t *s = bam_aux_get_core(b, tag); - *err = 0; type = *s++; - if (s == 0) { *err = -1; return 0; } + if (s == 0) return 0; if (type == 'Z' || type == 'H') return (char*)s; - else { *err = -2; return 0; } + else return 0; +} + +/****************** + * rg2lib related * + ******************/ + +int bam_strmap_put(void *rg2lib, const char *rg, const char *lib) +{ + int ret; + khint_t k; + khash_t(r2l) *h = (khash_t(r2l)*)rg2lib; + char *key; + if (h == 0) return 1; + key = strdup(rg); + k = kh_put(r2l, h, key, &ret); + if (ret) kh_val(h, k) = strdup(lib); + else { + fprintf(stderr, "[bam_rg2lib_put] duplicated @RG ID: %s\n", rg); + free(key); + } + return 0; +} + +const char *bam_strmap_get(const void *rg2lib, const char *rg) +{ + const khash_t(r2l) *h = (const khash_t(r2l)*)rg2lib; + khint_t k; + if (h == 0) return 0; + k = kh_get(r2l, h, rg); + if (k != kh_end(h)) return (const char*)kh_val(h, k); + else return 0; +} + +void *bam_strmap_dup(const void *rg2lib) +{ + const khash_t(r2l) *h = (const khash_t(r2l)*)rg2lib; + khash_t(r2l) *g; + khint_t k, l; + int ret; + if (h == 0) return 0; + g = kh_init(r2l); + for (k = kh_begin(h); k < kh_end(h); ++k) { + if (kh_exist(h, k)) { + char *key = strdup(kh_key(h, k)); + l = kh_put(r2l, g, key, &ret); + kh_val(g, l) = strdup(kh_val(h, k)); + } + } + return g; +} + +void *bam_strmap_init() +{ + return (void*)kh_init(r2l); +} + +void bam_strmap_destroy(void *rg2lib) +{ + khash_t(r2l) *h = (khash_t(r2l)*)rg2lib; + khint_t k; + if (h == 0) return; + for (k = kh_begin(h); k < kh_end(h); ++k) { + if (kh_exist(h, k)) { + free((char*)kh_key(h, k)); free(kh_val(h, k)); + } + } + kh_destroy(r2l, h); }