]> git.donarmstrong.com Git - samtools.git/commitdiff
Added the support for the new SAM/BAM type "B"
authorHeng Li <lh3@live.co.uk>
Mon, 18 Apr 2011 01:55:37 +0000 (01:55 +0000)
committerHeng Li <lh3@live.co.uk>
Mon, 18 Apr 2011 01:55:37 +0000 (01:55 +0000)
bam.c
bam.h
bam_aux.c
bam_import.c
bamtk.c
examples/toy.sam
sam_header.c

diff --git a/bam.c b/bam.c
index a65aed5d551053a50952e95d58a763a77fadd06e..5fac17df6ec27d7cbb04b5600c3ce54d5c06b354 100644 (file)
--- a/bam.c
+++ b/bam.c
@@ -160,6 +160,19 @@ static void swap_endian_data(const bam1_core_t *c, int data_len, uint8_t *data)
                else if (type == 'I' || type == 'F') { bam_swap_endian_4p(s); s += 4; }
                else if (type == 'D') { bam_swap_endian_8p(s); s += 8; }
                else if (type == 'Z' || type == 'H') { while (*s) ++s; ++s; }
+               else if (type == 'B') {
+                       int32_t n, Bsize = bam_aux_type2size(*s);
+                       memcpy(&n, s + 1, 4);
+                       if (1 == Bsize) {
+                       } else if (2 == Bsize) {
+                               for (i = 0; i < n; i += 2)
+                                       bam_swap_endian_2p(s + 5 + i);
+                       } else if (4 == Bsize) {
+                               for (i = 0; i < n; i += 4)
+                                       bam_swap_endian_4p(s + 5 + i);
+                       }
+                       bam_swap_endian_4p(s+1); 
+               }
        }
 }
 
@@ -289,6 +302,23 @@ char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of)
                else if (type == 'f') { ksprintf(&str, "f:%g", *(float*)s); s += 4; }
                else if (type == 'd') { ksprintf(&str, "d:%lg", *(double*)s); s += 8; }
                else if (type == 'Z' || type == 'H') { kputc(type, &str); kputc(':', &str); while (*s) kputc(*s++, &str); ++s; }
+               else if (type == 'B') {
+                       uint8_t sub_type = *(s++);
+                       int32_t n;
+                       memcpy(&n, s, 4);
+                       s += 4; // no point to the start of the array
+                       kputc(type, &str); kputc(':', &str); kputc(sub_type, &str); // write the typing
+                       for (i = 0; i < n; ++i) {
+                               kputc(',', &str);
+                               if ('c' == sub_type || 'c' == sub_type) { kputw(*(int8_t*)s, &str); ++s; }
+                               else if ('C' == sub_type) { kputw(*(uint8_t*)s, &str); ++s; }
+                               else if ('s' == sub_type) { kputw(*(int16_t*)s, &str); s += 2; }
+                               else if ('S' == sub_type) { kputw(*(uint16_t*)s, &str); s += 2; }
+                               else if ('i' == sub_type) { kputw(*(int32_t*)s, &str); s += 4; }
+                               else if ('I' == sub_type) { kputuw(*(uint32_t*)s, &str); s += 4; }
+                               else if ('f' == sub_type) { ksprintf(&str, "%g", *(float*)s); s += 4; }
+                       }
+               }
        }
        return str.s;
 }
diff --git a/bam.h b/bam.h
index 87e3de326fea144e10323ad8229309b6b31136a4..2de5b091a6bcbc217b244284e1d9464214c6172d 100644 (file)
--- a/bam.h
+++ b/bam.h
@@ -746,4 +746,13 @@ static inline bam1_t *bam_dup1(const bam1_t *src)
        return b;
 }
 
+static inline int bam_aux_type2size(int x)
+{
+       if (x == 'C' || x == 'c' || x == 'A') return 1;
+       else if (x == 'S' || x == 's') return 2;
+       else if (x == 'I' || x == 'i' || x == 'f') return 4;
+       else return 0;
+}
+
+
 #endif
index 4fd3190a73799c6fd68257cc8c1125a017dc4860..b6a8d88dcc9bde5e9f43f8e4d7f0ec1bd3ecc919 100644 (file)
--- a/bam_aux.c
+++ b/bam_aux.c
@@ -26,14 +26,12 @@ uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2])
 }
 
 #define __skip_tag(s) do { \
-               int type = toupper(*(s));                                                                               \
-               ++(s);                                                                                                                  \
-               if (type == 'C' || type == 'A') ++(s);                                                  \
-               else if (type == 'S') (s) += 2;                                                                 \
-               else if (type == 'I' || type == 'F') (s) += 4;                                  \
-               else if (type == 'D') (s) += 8;                                                                 \
-               else if (type == 'Z' || type == 'H') { while (*(s)) ++(s); ++(s); } \
-       } while (0)
+               int type = toupper(*(s)); \
+               ++(s); \
+               if (type == 'Z' || type == 'H') { while (*(s)) ++(s); ++(s); } \
+               else if (type == 'B') (s) += 5 + bam_aux_type2size(*(s)) * (*(int32_t*)((s)+1)); \
+               else (s) += bam_aux_type2size(type); \
+       } while(0)
 
 uint8_t *bam_aux_get(const bam1_t *b, const char tag[2])
 {
index 9d84328bdbdafb5f4e76a19e7edf176121aca5b4..8c24692fe2d1b5c080c7eb7057b2c459f1bc9751 100644 (file)
@@ -427,6 +427,27 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b)
                                memcpy(s, str->s + 5, str->l - 5);
                                s[str->l - 5] = 0;
                                doff += size;
+                       } else if (type == 'B') {
+                               int32_t n = 0, Bsize, k = 0, size;
+                               char *p;
+                               if (str->l < 8) parse_error(fp->n_lines, "too few values in aux type B");
+                               Bsize = bam_aux_type2size(str->s[5]); // the size of each element
+                               for (p = (char*)str->s + 6; *p; ++p) // count the number of elements in the array
+                                       if (*p == ',') ++n;
+                               p = str->s + 7; // now p points to the first number in the array
+                               size = 6 + Bsize * n; // total number of bytes allocated to this tag
+                               s = alloc_data(b, doff + 6 * Bsize * n) + doff; // allocate memory
+                               *s++ = 'B'; *s++ = str->s[5];
+                               memcpy(s, &n, 4); s += 4; // write the number of elements
+                               if (str->s[5] == 'c')      while (p < str->s + str->l) ((int8_t*)s)[k++]   = (int8_t)strtol(p, &p, 0),   ++p;
+                               else if (str->s[5] == 'C') while (p < str->s + str->l) ((uint8_t*)s)[k++]  = (uint8_t)strtol(p, &p, 0),  ++p;
+                               else if (str->s[5] == 's') while (p < str->s + str->l) ((int16_t*)s)[k++]  = (int16_t)strtol(p, &p, 0),  ++p; // FIXME: avoid unaligned memory
+                               else if (str->s[5] == 'S') while (p < str->s + str->l) ((uint16_t*)s)[k++] = (uint16_t)strtol(p, &p, 0), ++p;
+                               else if (str->s[5] == 'i') while (p < str->s + str->l) ((int32_t*)s)[k++]  = (int32_t)strtol(p, &p, 0),  ++p;
+                               else if (str->s[5] == 'I') while (p < str->s + str->l) ((uint32_t*)s)[k++] = (uint32_t)strtol(p, &p, 0), ++p;
+                               else if (str->s[5] == 'f') while (p < str->s + str->l) ((float*)s)[k++]    = (float)strtof(p, &p),       ++p;
+                               else parse_error(fp->n_lines, "unrecognized array type");
+                               s += Bsize * n; doff += size;
                        } else parse_error(fp->n_lines, "unrecognized type");
                        if (dret == '\n' || dret == '\r') break;
                }
diff --git a/bamtk.c b/bamtk.c
index 5886570a88f9fbdc43e6442f1f76df7f9e100243..0941f6f16f9194db93b152a48e2007b63f498d54 100644 (file)
--- a/bamtk.c
+++ b/bamtk.c
@@ -31,46 +31,6 @@ int main_depth(int argc, char *argv[]);
 int faidx_main(int argc, char *argv[]);
 int glf3_view_main(int argc, char *argv[]);
 
-int bam_tagview(int argc, char *argv[])
-{
-       bamFile fp;
-       bam_header_t *header;
-       bam1_t *b;
-       char tag[2];
-       int ret;
-       if (argc < 3) {
-               fprintf(stderr, "Usage: samtools tagview <in.bam> <tag>\n");
-               return 1;
-       }
-       fp = strcmp(argv[1], "-")? bam_open(argv[1], "r") : bam_dopen(fileno(stdin), "r");
-       assert(fp);
-       header = bam_header_read(fp);
-       if (header == 0) {
-               fprintf(stderr, "[bam_view] fail to read the BAM header. Abort!\n");
-               return 1;
-       }
-       tag[0] = argv[2][0]; tag[1] = argv[2][1];
-       b = (bam1_t*)calloc(1, sizeof(bam1_t));
-       while ((ret = bam_read1(fp, b)) >= 0) {
-               uint8_t *d = bam_aux_get(b, tag);
-               if (d) {
-                       printf("%s\t%d\t", bam1_qname(b), b->core.flag);
-                       if (d[0] == 'Z' || d[0] == 'H') printf("%s\n", bam_aux2Z(d));
-                       else if (d[0] == 'f') printf("%f\n", bam_aux2f(d));
-                       else if (d[0] == 'd') printf("%lf\n", bam_aux2d(d));
-                       else if (d[0] == 'A') printf("%c\n", bam_aux2A(d));
-                       else if (d[0] == 'c' || d[0] == 's' || d[0] == 'i') printf("%d\n", bam_aux2i(d));
-                       else if (d[0] == 'C' || d[0] == 'S' || d[0] == 'I') printf("%u\n", bam_aux2i(d));
-                       else printf("\n");
-               }
-       }
-       if (ret < -1) fprintf(stderr, "[bam_view] truncated file? Continue anyway. (%d)\n", ret);
-       free(b->data); free(b);
-       bam_header_destroy(header);
-       bam_close(fp);
-       return 0;
-}
-
 static int usage()
 {
        fprintf(stderr, "\n");
@@ -131,7 +91,6 @@ int main(int argc, char *argv[])
        else if (strcmp(argv[1], "rmdup") == 0) return bam_rmdup(argc-1, argv+1);
        else if (strcmp(argv[1], "glfview") == 0) return glf3_view_main(argc-1, argv+1);
        else if (strcmp(argv[1], "flagstat") == 0) return bam_flagstat(argc-1, argv+1);
-       else if (strcmp(argv[1], "tagview") == 0) return bam_tagview(argc-1, argv+1);
        else if (strcmp(argv[1], "calmd") == 0) return bam_fillmd(argc-1, argv+1);
        else if (strcmp(argv[1], "fillmd") == 0) return bam_fillmd(argc-1, argv+1);
        else if (strcmp(argv[1], "reheader") == 0) return main_reheader(argc-1, argv+1);
index 1aff2204e4078b9adc9943a9533a2626cca887c8..33449b176bc030a032c261c5280a907cebd8e58a 100644 (file)
@@ -1,6 +1,6 @@
 @SQ    SN:ref  LN:45
 @SQ    SN:ref2 LN:40
-r001   163     ref     7       30      8M4I4M1D3M      =       37      39      TTAGATAAAGAGGATACTG     *
+r001   163     ref     7       30      8M4I4M1D3M      =       37      39      TTAGATAAAGAGGATACTG     *       XX:B:S,12561,2,20,112
 r002   0       ref     9       30      1S2I6M1P1I1P1I4M2I      *       0       0       AAAAGATAAGGGATAAA       *
 r003   0       ref     9       30      5H6M    *       0       0       AGCTAA  *
 r004   0       ref     16      30      6M14N1I5M       *       0       0       ATAGCTCTCAGC    *
index f49b2d463cff3301ee9351ec956a69d6d2c185c4..f4c8a3bb65d8c52159c16b0a92e1c4d2528e4feb 100644 (file)
@@ -38,7 +38,7 @@ const char *o_sq_tags[] = {"AS","M5","UR","SP",NULL};
 const char *r_sq_tags[] = {"SN","LN",NULL};
 const char *u_sq_tags[] = {"SN",NULL};
 
-const char *o_rg_tags[] = {"LB","DS","PU","PI","CN","DT","PL",NULL};
+const char *o_rg_tags[] = {"CN","DS","DT","FO","KS","LB","PG","PI","PL","PU","SM",NULL};
 const char *r_rg_tags[] = {"ID",NULL};
 const char *u_rg_tags[] = {"ID",NULL};