From: Heng Li Date: Fri, 24 Apr 2009 11:08:20 +0000 (+0000) Subject: * samtools-0.1.3-2 (r239) X-Git-Url: https://git.donarmstrong.com/?p=samtools.git;a=commitdiff_plain;h=7bd63d5c2677364126702d76c56e4d75981c62f4 * samtools-0.1.3-2 (r239) * fixed bugs in bam_aux.c (these functions nevered used by samtools) * removed bam_aux_init()/bam_aux_destroy() * added tagview for testing bam_aux --- diff --git a/bam.h b/bam.h index 76f3e0f..c0bde4b 100644 --- a/bam.h +++ b/bam.h @@ -401,8 +401,8 @@ extern "C" { @abstract Free the memory allocated for an alignment. @param b pointer to an alignment */ -#define bam_destroy1(b) do { \ - if ((b)->hash) bam_aux_destroy(b); free((b)->data); free(b); \ +#define bam_destroy1(b) do { \ + if ((b)->hash) free((b)->data); free(b); \ } while (0) /*! @@ -600,11 +600,12 @@ extern "C" { */ void bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end); - int32_t bam_aux_geti(bam1_t *b, const char tag[2], int *err); - float bam_aux_getf(bam1_t *b, const char tag[2], int *err); - char bam_aux_getc(bam1_t *b, const char tag[2], int *err); - char *bam_aux_getZH(bam1_t *b, const char tag[2], int *err); - void bam_aux_destroy(bam1_t *b); + uint8_t *bam_aux_get(bam1_t *b, const char tag[2]); + int32_t bam_aux2i(const uint8_t *s); + float bam_aux2f(const uint8_t *s); + double bam_aux2d(const uint8_t *s); + char bam_aux2A(const uint8_t *s); + char *bam_aux2Z(const uint8_t *s); /*! @abstract Calculate the rightmost coordinate of an alignment on the diff --git a/bam_aux.c b/bam_aux.c index 081f07b..f9be398 100644 --- a/bam_aux.c +++ b/bam_aux.c @@ -1,9 +1,27 @@ #include #include "bam.h" #include "khash.h" -KHASH_MAP_INIT_INT(aux, uint8_t*) KHASH_MAP_INIT_STR(s, int) +uint8_t *bam_aux_get(bam1_t *b, const char tag[2]) +{ + uint8_t *s; + int y = tag[0]<<8 | tag[1]; + s = bam1_aux(b); + while (s < b->data + b->data_len) { + int type, x = (int)s[0]<<8 | s[1]; + s += 2; + if (x == y) return s; + type = toupper(*s); ++s; + if (type == 'C') ++s; + else if (type == 'S') s += 2; + else if (type == 'I' || type == 'F') s += 4; + else if (type == 'D') s += 8; + else if (type == 'Z' || type == 'H') { while (*s) putchar(*s++); ++s; } + } + return 0; +} + void bam_init_header_hash(bam_header_t *header) { if (header->hash == 0) { @@ -71,90 +89,51 @@ void bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *b free(s); } -void bam_aux_init(bam1_t *b) -{ - khash_t(aux) *h; - uint8_t *s; - if (b->hash == 0) { - h = kh_init(aux); - b->hash = h; - } else { - h = (khash_t(aux)*)b->hash; - kh_clear(aux, h); - } - s = bam1_aux(b); - while (s < b->data + b->data_len) { - uint32_t x = (uint32_t)s[0]<<8 | s[1]; - int ret, type; - khint_t k; - s += 2; type = toupper(*s); ++s; - k = kh_put(aux, h, x, &ret); - kh_value(h, k) = s; - if (type == 'C') ++s; - else if (type == 'S') s += 2; - else if (type == 'I') s += 4; - else if (type == 'F') s += 4; - else if (type == 'Z') { while (*s) putchar(*s++); ++s; } - } -} -void bam_aux_destroy(bam1_t *b) -{ - khash_t(aux) *h = (khash_t(aux)*)b->hash; - kh_destroy(aux, h); - b->hash = 0; -} -static uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]) -{ - uint32_t x = (uint32_t)tag[0]<<8 | tag[1]; - khint_t k; - khash_t(aux) *h; - if (b->hash == 0) bam_aux_init(b); - h = (khash_t(aux)*)b->hash; - k = kh_get(aux, h, x); - if (k == kh_end(h)) return 0; - return kh_value(h, k); -} -int32_t bam_aux_geti(bam1_t *b, const char tag[2], int *err) +int32_t bam_aux2i(const uint8_t *s) { int type; - uint8_t *s = bam_aux_get_core(b, tag); - *err = 0; - if (s == 0) { *err = -1; return 0; } + if (s == 0) return 0; type = *s++; if (type == 'c') return (int32_t)*(int8_t*)s; else if (type == 'C') return (int32_t)*(uint8_t*)s; else if (type == 's') return (int32_t)*(int16_t*)s; else if (type == 'S') return (int32_t)*(uint16_t*)s; else if (type == 'i' || type == 'I') return *(int32_t*)s; - else { *err = -2; return 0; } + else return 0; } -float bam_aux_getf(bam1_t *b, const char tag[2], int *err) + +float bam_aux2f(const uint8_t *s) { int type; - uint8_t *s = bam_aux_get_core(b, tag); - *err = 0; type = *s++; - if (s == 0) { *err = -1; return 0; } + if (s == 0) return 0.0; if (type == 'f') return *(float*)s; - else { *err = -2; return 0; } + else return 0.0; } -char bam_aux_getc(bam1_t *b, const char tag[2], int *err) + +double bam_aux2d(const uint8_t *s) { int type; - uint8_t *s = bam_aux_get_core(b, tag); - *err = 0; type = *s++; - if (s == 0) { *err = -1; return 0; } - if (type == 'c') return *(char*)s; - else { *err = -2; return 0; } + if (s == 0) return 0.0; + if (type == 'd') return *(double*)s; + else return 0.0; } -char *bam_aux_getZH(bam1_t *b, const char tag[2], int *err) + +char bam_aux2A(const uint8_t *s) +{ + int type; + type = *s++; + if (s == 0) return 0; + if (type == 'A') return *(char*)s; + else return 0; +} + +char *bam_aux2Z(const uint8_t *s) { int type; - uint8_t *s = bam_aux_get_core(b, tag); - *err = 0; type = *s++; - if (s == 0) { *err = -1; return 0; } + if (s == 0) return 0; if (type == 'Z' || type == 'H') return (char*)s; - else { *err = -2; return 0; } + else return 0; } diff --git a/bamtk.c b/bamtk.c index a19abc4..f12f900 100644 --- a/bamtk.c +++ b/bamtk.c @@ -3,7 +3,7 @@ #include "bam.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.3-1" +#define PACKAGE_VERSION "0.1.3-2" #endif int bam_taf2baf(int argc, char *argv[]); @@ -101,6 +101,46 @@ int bam_view(int argc, char *argv[]) return 0; } +int bam_tagview(int argc, char *argv[]) +{ + bamFile fp; + bam_header_t *header; + bam1_t *b; + char tag[2]; + int ret; + if (argc < 3) { + fprintf(stderr, "Usage: samtools tagview \n"); + return 1; + } + fp = strcmp(argv[1], "-")? bam_open(argv[1], "r") : bam_dopen(fileno(stdin), "r"); + assert(fp); + header = bam_header_read(fp); + if (header == 0) { + fprintf(stderr, "[bam_view] fail to read the BAM header. Abort!\n"); + return 1; + } + tag[0] = argv[2][0]; tag[1] = argv[2][1]; + b = (bam1_t*)calloc(1, sizeof(bam1_t)); + while ((ret = bam_read1(fp, b)) >= 0) { + uint8_t *d = bam_aux_get(b, tag); + if (d) { + printf("%s\t%d\t", bam1_qname(b), b->core.flag); + if (d[0] == 'Z' || d[0] == 'H') printf("%s\n", bam_aux2Z(d)); + else if (d[0] == 'f') printf("%f\n", bam_aux2f(d)); + else if (d[0] == 'd') printf("%lf\n", bam_aux2d(d)); + else if (d[0] == 'A') printf("%c\n", bam_aux2A(d)); + else if (d[0] == 'c' || d[0] == 's' || d[0] == 'i') printf("%d\n", bam_aux2i(d)); + else if (d[0] == 'C' || d[0] == 'S' || d[0] == 'I') printf("%u\n", bam_aux2i(d)); + else printf("\n"); + } + } + if (ret < -1) fprintf(stderr, "[bam_view] truncated file? Continue anyway. (%d)\n", ret); + free(b->data); free(b); + bam_header_destroy(header); + bam_close(fp); + return 0; +} + static int usage() { fprintf(stderr, "\n"); @@ -139,6 +179,7 @@ int main(int argc, char *argv[]) else if (strcmp(argv[1], "rmdup") == 0) return bam_rmdup(argc-1, argv+1); else if (strcmp(argv[1], "glfview") == 0) return glf3_view_main(argc-1, argv+1); else if (strcmp(argv[1], "flagstat") == 0) return bam_flagstat(argc-1, argv+1); + else if (strcmp(argv[1], "tagview") == 0) return bam_tagview(argc-1, argv+1); #ifndef _NO_CURSES else if (strcmp(argv[1], "tview") == 0) return bam_tview_main(argc-1, argv+1); #endif