X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bam_aux.c;h=89e99f281adb652e144eea0a77074a1c6cec023a;hb=d0e30eec1158752010659982342a611fc91ae8e3;hp=41be81910d89478dc160fada8fe2b4c38f8052c5;hpb=11b0656ab8ca22edb2fb7fb54fe761ea4c9e05e3;p=samtools.git diff --git a/bam_aux.c b/bam_aux.c index 41be819..89e99f2 100644 --- a/bam_aux.c +++ b/bam_aux.c @@ -1,7 +1,9 @@ #include #include "bam.h" #include "khash.h" +typedef char *str_p; KHASH_MAP_INIT_STR(s, int) +KHASH_MAP_INIT_STR(r2l, str_p) void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data) { @@ -17,30 +19,47 @@ void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *d b->data[ori_len + 2] = type; memcpy(b->data + ori_len + 3, data, len); } -/* + uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]) { return bam_aux_get(b, tag); } -*/ -uint8_t *bam_aux_get(bam1_t *b, const char tag[2]) + +#define __skip_tag(s) do { \ + int type = toupper(*(s)); \ + ++(s); \ + if (type == 'C' || type == 'A') ++(s); \ + else if (type == 'S') (s) += 2; \ + else if (type == 'I' || type == 'F') (s) += 4; \ + else if (type == 'D') (s) += 8; \ + else if (type == 'Z' || type == 'H') { while (*(s)) ++(s); ++(s); } \ + } while (0) + +uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]) { uint8_t *s; int y = tag[0]<<8 | tag[1]; s = bam1_aux(b); while (s < b->data + b->data_len) { - int type, x = (int)s[0]<<8 | s[1]; + int x = (int)s[0]<<8 | s[1]; s += 2; if (x == y) return s; - type = toupper(*s); ++s; - if (type == 'C') ++s; - else if (type == 'S') s += 2; - else if (type == 'I' || type == 'F') s += 4; - else if (type == 'D') s += 8; - else if (type == 'Z' || type == 'H') { while (*s) putchar(*s++); ++s; } + __skip_tag(s); } return 0; } +// s MUST BE returned by bam_aux_get() +int bam_aux_del(bam1_t *b, uint8_t *s) +{ + uint8_t *p, *aux; + aux = bam1_aux(b); + p = s - 2; + __skip_tag(s); + memmove(p, s, b->l_aux - (s - aux)); + b->data_len -= s - p; + b->l_aux -= s - p; + return 0; +} void bam_init_header_hash(bam_header_t *header) { @@ -70,7 +89,7 @@ int32_t bam_get_tid(const bam_header_t *header, const char *seq_name) return k == kh_end(h)? -1 : kh_value(h, k); } -void bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end) +int bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end) { char *s, *p; int i, l, k; @@ -91,12 +110,12 @@ void bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *b iter = kh_get(s, h, s); /* get the ref_id */ if (iter == kh_end(h)) { // name not found *ref_id = -1; free(s); - return; + return -1; } *ref_id = kh_value(h, iter); if (i == k) { /* dump the whole sequence */ *begin = 0; *end = 1<<29; free(s); - return; + return -1; } for (p = s + i + 1; i != k; ++i) if (s[i] == '-') break; *begin = atoi(p); @@ -105,8 +124,12 @@ void bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *b *end = atoi(p); } else *end = 1<<29; if (*begin > 0) --*begin; - assert(*begin <= *end); free(s); + if (*begin > *end) { + fprintf(stderr, "[bam_parse_region] invalid region.\n"); + return -1; + } + return 0; } int32_t bam_aux2i(const uint8_t *s) @@ -157,105 +180,3 @@ char *bam_aux2Z(const uint8_t *s) if (type == 'Z' || type == 'H') return (char*)s; else return 0; } - -char bam_aux_getCSi(bam1_t *b, int i) -{ - uint8_t *c = bam_aux_get(b, "CS"); - char *cs = NULL; - - // return the base if the tag was not found - if(0 == c) return 0; - - cs = bam_aux2Z(c); - // adjust for strandedness and leading adaptor - if(bam1_strand(b)) i = strlen(cs) - 1 - i; - else i++; - return cs[i]; -} - -char bam_aux_getCQi(bam1_t *b, int i) -{ - uint8_t *c = bam_aux_get(b, "CQ"); - char *cq = NULL; - - // return the base if the tag was not found - if(0 == c) return 0; - - cq = bam_aux2Z(c); - // adjust for strandedness - if(bam1_strand(b)) i = strlen(cq) - 1 - i; - return cq[i]; -} - -char bam_aux_nt2int(char a) -{ - switch(toupper(a)) { - case 'A': - return 0; - break; - case 'C': - return 1; - break; - case 'G': - return 2; - break; - case 'T': - return 3; - break; - default: - return 4; - break; - } -} - -char bam_aux_ntnt2cs(char a, char b) -{ - a = bam_aux_nt2int(a); - b = bam_aux_nt2int(b); - if(4 == a || 4 == b) return '4'; - return "0123"[(int)(a ^ b)]; -} - -char bam_aux_getCEi(bam1_t *b, int i) -{ - int cs_i; - uint8_t *c = bam_aux_get(b, "CS"); - char *cs = NULL; - char prev_b, cur_b; - char cur_color, cor_color; - - // return the base if the tag was not found - if(0 == c) return 0; - - cs = bam_aux2Z(c); - - // adjust for strandedness and leading adaptor - if(bam1_strand(b)) { //reverse strand - cs_i = strlen(cs) - 1 - i; - // get current color - cur_color = cs[cs_i]; - // get previous base - prev_b = (0 == cs_i) ? cs[0] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i+1)]; - // get current base - cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)]; - } - else { - cs_i=i+1; - // get current color - cur_color = cs[cs_i]; - // get previous base - prev_b = (0 == i) ? cs[0] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i-1)]; - // get current base - cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)]; - } - - // corrected color - cor_color = bam_aux_ntnt2cs(prev_b, cur_b); - - if(cur_color == cor_color) { - return '-'; - } - else { - return cur_color; - } -}