X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bam_aux.c;h=a63e2aeadccf35e95b786097cd33394cb31a7a06;hb=1a23a35867ee992dcffcf3d17df2b7d41732f33b;hp=98690487a517497976e0cfe0ac40e5c0dc2d932a;hpb=f08c0abe0850773c68bd72280f6063ef6d4505f1;p=samtools.git diff --git a/bam_aux.c b/bam_aux.c index 9869048..a63e2ae 100644 --- a/bam_aux.c +++ b/bam_aux.c @@ -1,7 +1,9 @@ #include #include "bam.h" #include "khash.h" +typedef char *str_p; KHASH_MAP_INIT_STR(s, int) +KHASH_MAP_INIT_STR(r2l, str_p) void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data) { @@ -17,13 +19,13 @@ void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *d b->data[ori_len + 2] = type; memcpy(b->data + ori_len + 3, data, len); } - +/* uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]) { return bam_aux_get(b, tag); } - -uint8_t *bam_aux_get(bam1_t *b, const char tag[2]) +*/ +uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]) { uint8_t *s; int y = tag[0]<<8 | tag[1]; @@ -158,104 +160,69 @@ char *bam_aux2Z(const uint8_t *s) else return 0; } -char bam_aux_getCSi(bam1_t *b, int i) -{ - uint8_t *c = bam_aux_get(b, "CS"); - char *cs = NULL; - - // return the base if the tag was not found - if(0 == c) return 0; +/****************** + * rg2lib related * + ******************/ - cs = bam_aux2Z(c); - // adjust for strandedness and leading adaptor - if(bam1_strand(b)) i = strlen(cs) - 1 - i; - else i++; - return cs[i]; +int bam_strmap_put(void *rg2lib, const char *rg, const char *lib) +{ + int ret; + khint_t k; + khash_t(r2l) *h = (khash_t(r2l)*)rg2lib; + char *key; + if (h == 0) return 1; + key = strdup(rg); + k = kh_put(r2l, h, key, &ret); + if (ret) kh_val(h, k) = strdup(lib); + else { + fprintf(stderr, "[bam_rg2lib_put] duplicated @RG ID: %s\n", rg); + free(key); + } + return 0; } -char bam_aux_getCQi(bam1_t *b, int i) +const char *bam_strmap_get(const void *rg2lib, const char *rg) { - uint8_t *c = bam_aux_get(b, "CQ"); - char *cq = NULL; - - // return the base if the tag was not found - if(0 == c) return 0; - - cq = bam_aux2Z(c); - // adjust for strandedness - if(bam1_strand(b)) i = strlen(cq) - 1 - i; - return cq[i]; + const khash_t(r2l) *h = (const khash_t(r2l)*)rg2lib; + khint_t k; + if (h == 0) return 0; + k = kh_get(r2l, h, rg); + if (k != kh_end(h)) return (const char*)kh_val(h, k); + else return 0; } -char bam_aux_nt2int(char a) +void *bam_strmap_dup(const void *rg2lib) { - switch(toupper(a)) { - case 'A': - return 0; - break; - case 'C': - return 1; - break; - case 'G': - return 2; - break; - case 'T': - return 3; - break; - default: - return 4; - break; + const khash_t(r2l) *h = (const khash_t(r2l)*)rg2lib; + khash_t(r2l) *g; + khint_t k, l; + int ret; + if (h == 0) return 0; + g = kh_init(r2l); + for (k = kh_begin(h); k < kh_end(h); ++k) { + if (kh_exist(h, k)) { + char *key = strdup(kh_key(h, k)); + l = kh_put(r2l, g, key, &ret); + kh_val(g, l) = strdup(kh_val(h, k)); + } } + return g; } -char bam_aux_ntnt2cs(char a, char b) +void *bam_strmap_init() { - a = bam_aux_nt2int(a); - b = bam_aux_nt2int(b); - if(4 == a || 4 == b) return '4'; - return "0123"[(int)(a ^ b)]; + return (void*)kh_init(r2l); } -char bam_aux_getCEi(bam1_t *b, int i) +void bam_strmap_destroy(void *rg2lib) { - int cs_i; - uint8_t *c = bam_aux_get(b, "CS"); - char *cs = NULL; - char prev_b, cur_b; - char cur_color, cor_color; - - // return the base if the tag was not found - if(0 == c) return 0; - - cs = bam_aux2Z(c); - - // adjust for strandedness and leading adaptor - if(bam1_strand(b)) { //reverse strand - cs_i = strlen(cs) - 1 - i; - // get current color - cur_color = cs[cs_i]; - // get previous base - prev_b = (0 == cs_i) ? cs[0] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i+1)]; - // get current base - cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)]; - } - else { - cs_i=i+1; - // get current color - cur_color = cs[cs_i]; - // get previous base - prev_b = (0 == i) ? cs[0] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i-1)]; - // get current base - cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)]; - } - - // corrected color - cor_color = bam_aux_ntnt2cs(prev_b, cur_b); - - if(cur_color == cor_color) { - return '-'; - } - else { - return cur_color; + khash_t(r2l) *h = (khash_t(r2l)*)rg2lib; + khint_t k; + if (h == 0) return; + for (k = kh_begin(h); k < kh_end(h); ++k) { + if (kh_exist(h, k)) { + free((char*)kh_key(h, k)); free(kh_val(h, k)); + } } + kh_destroy(r2l, h); }