else if (type == 'I' || type == 'F') { bam_swap_endian_4p(s); s += 4; }
else if (type == 'D') { bam_swap_endian_8p(s); s += 8; }
else if (type == 'Z' || type == 'H') { while (*s) ++s; ++s; }
+ else if (type == 'B') {
+ int32_t n, Bsize = bam_aux_type2size(*s);
+ memcpy(&n, s + 1, 4);
+ if (1 == Bsize) {
+ } else if (2 == Bsize) {
+ for (i = 0; i < n; i += 2)
+ bam_swap_endian_2p(s + 5 + i);
+ } else if (4 == Bsize) {
+ for (i = 0; i < n; i += 4)
+ bam_swap_endian_4p(s + 5 + i);
+ }
+ bam_swap_endian_4p(s+1);
+ }
}
}
else if (type == 'f') { ksprintf(&str, "f:%g", *(float*)s); s += 4; }
else if (type == 'd') { ksprintf(&str, "d:%lg", *(double*)s); s += 8; }
else if (type == 'Z' || type == 'H') { kputc(type, &str); kputc(':', &str); while (*s) kputc(*s++, &str); ++s; }
+ else if (type == 'B') {
+ uint8_t sub_type = *(s++);
+ int32_t n;
+ memcpy(&n, s, 4);
+ s += 4; // no point to the start of the array
+ kputc(type, &str); kputc(':', &str); kputc(sub_type, &str); // write the typing
+ for (i = 0; i < n; ++i) {
+ kputc(',', &str);
+ if ('c' == sub_type || 'c' == sub_type) { kputw(*(int8_t*)s, &str); ++s; }
+ else if ('C' == sub_type) { kputw(*(uint8_t*)s, &str); ++s; }
+ else if ('s' == sub_type) { kputw(*(int16_t*)s, &str); s += 2; }
+ else if ('S' == sub_type) { kputw(*(uint16_t*)s, &str); s += 2; }
+ else if ('i' == sub_type) { kputw(*(int32_t*)s, &str); s += 4; }
+ else if ('I' == sub_type) { kputuw(*(uint32_t*)s, &str); s += 4; }
+ else if ('f' == sub_type) { ksprintf(&str, "%g", *(float*)s); s += 4; }
+ }
+ }
}
return str.s;
}
return b;
}
+static inline int bam_aux_type2size(int x)
+{
+ if (x == 'C' || x == 'c' || x == 'A') return 1;
+ else if (x == 'S' || x == 's') return 2;
+ else if (x == 'I' || x == 'i' || x == 'f') return 4;
+ else return 0;
+}
+
+
#endif
}
#define __skip_tag(s) do { \
- int type = toupper(*(s)); \
- ++(s); \
- if (type == 'C' || type == 'A') ++(s); \
- else if (type == 'S') (s) += 2; \
- else if (type == 'I' || type == 'F') (s) += 4; \
- else if (type == 'D') (s) += 8; \
- else if (type == 'Z' || type == 'H') { while (*(s)) ++(s); ++(s); } \
- } while (0)
+ int type = toupper(*(s)); \
+ ++(s); \
+ if (type == 'Z' || type == 'H') { while (*(s)) ++(s); ++(s); } \
+ else if (type == 'B') (s) += 5 + bam_aux_type2size(*(s)) * (*(int32_t*)((s)+1)); \
+ else (s) += bam_aux_type2size(type); \
+ } while(0)
uint8_t *bam_aux_get(const bam1_t *b, const char tag[2])
{
memcpy(s, str->s + 5, str->l - 5);
s[str->l - 5] = 0;
doff += size;
+ } else if (type == 'B') {
+ int32_t n = 0, Bsize, k = 0, size;
+ char *p;
+ if (str->l < 8) parse_error(fp->n_lines, "too few values in aux type B");
+ Bsize = bam_aux_type2size(str->s[5]); // the size of each element
+ for (p = (char*)str->s + 6; *p; ++p) // count the number of elements in the array
+ if (*p == ',') ++n;
+ p = str->s + 7; // now p points to the first number in the array
+ size = 6 + Bsize * n; // total number of bytes allocated to this tag
+ s = alloc_data(b, doff + 6 * Bsize * n) + doff; // allocate memory
+ *s++ = 'B'; *s++ = str->s[5];
+ memcpy(s, &n, 4); s += 4; // write the number of elements
+ if (str->s[5] == 'c') while (p < str->s + str->l) ((int8_t*)s)[k++] = (int8_t)strtol(p, &p, 0), ++p;
+ else if (str->s[5] == 'C') while (p < str->s + str->l) ((uint8_t*)s)[k++] = (uint8_t)strtol(p, &p, 0), ++p;
+ else if (str->s[5] == 's') while (p < str->s + str->l) ((int16_t*)s)[k++] = (int16_t)strtol(p, &p, 0), ++p; // FIXME: avoid unaligned memory
+ else if (str->s[5] == 'S') while (p < str->s + str->l) ((uint16_t*)s)[k++] = (uint16_t)strtol(p, &p, 0), ++p;
+ else if (str->s[5] == 'i') while (p < str->s + str->l) ((int32_t*)s)[k++] = (int32_t)strtol(p, &p, 0), ++p;
+ else if (str->s[5] == 'I') while (p < str->s + str->l) ((uint32_t*)s)[k++] = (uint32_t)strtol(p, &p, 0), ++p;
+ else if (str->s[5] == 'f') while (p < str->s + str->l) ((float*)s)[k++] = (float)strtof(p, &p), ++p;
+ else parse_error(fp->n_lines, "unrecognized array type");
+ s += Bsize * n; doff += size;
} else parse_error(fp->n_lines, "unrecognized type");
if (dret == '\n' || dret == '\r') break;
}
int faidx_main(int argc, char *argv[]);
int glf3_view_main(int argc, char *argv[]);
-int bam_tagview(int argc, char *argv[])
-{
- bamFile fp;
- bam_header_t *header;
- bam1_t *b;
- char tag[2];
- int ret;
- if (argc < 3) {
- fprintf(stderr, "Usage: samtools tagview <in.bam> <tag>\n");
- return 1;
- }
- fp = strcmp(argv[1], "-")? bam_open(argv[1], "r") : bam_dopen(fileno(stdin), "r");
- assert(fp);
- header = bam_header_read(fp);
- if (header == 0) {
- fprintf(stderr, "[bam_view] fail to read the BAM header. Abort!\n");
- return 1;
- }
- tag[0] = argv[2][0]; tag[1] = argv[2][1];
- b = (bam1_t*)calloc(1, sizeof(bam1_t));
- while ((ret = bam_read1(fp, b)) >= 0) {
- uint8_t *d = bam_aux_get(b, tag);
- if (d) {
- printf("%s\t%d\t", bam1_qname(b), b->core.flag);
- if (d[0] == 'Z' || d[0] == 'H') printf("%s\n", bam_aux2Z(d));
- else if (d[0] == 'f') printf("%f\n", bam_aux2f(d));
- else if (d[0] == 'd') printf("%lf\n", bam_aux2d(d));
- else if (d[0] == 'A') printf("%c\n", bam_aux2A(d));
- else if (d[0] == 'c' || d[0] == 's' || d[0] == 'i') printf("%d\n", bam_aux2i(d));
- else if (d[0] == 'C' || d[0] == 'S' || d[0] == 'I') printf("%u\n", bam_aux2i(d));
- else printf("\n");
- }
- }
- if (ret < -1) fprintf(stderr, "[bam_view] truncated file? Continue anyway. (%d)\n", ret);
- free(b->data); free(b);
- bam_header_destroy(header);
- bam_close(fp);
- return 0;
-}
-
static int usage()
{
fprintf(stderr, "\n");
else if (strcmp(argv[1], "rmdup") == 0) return bam_rmdup(argc-1, argv+1);
else if (strcmp(argv[1], "glfview") == 0) return glf3_view_main(argc-1, argv+1);
else if (strcmp(argv[1], "flagstat") == 0) return bam_flagstat(argc-1, argv+1);
- else if (strcmp(argv[1], "tagview") == 0) return bam_tagview(argc-1, argv+1);
else if (strcmp(argv[1], "calmd") == 0) return bam_fillmd(argc-1, argv+1);
else if (strcmp(argv[1], "fillmd") == 0) return bam_fillmd(argc-1, argv+1);
else if (strcmp(argv[1], "reheader") == 0) return main_reheader(argc-1, argv+1);
@SQ SN:ref LN:45
@SQ SN:ref2 LN:40
-r001 163 ref 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG *
+r001 163 ref 7 30 8M4I4M1D3M = 37 39 TTAGATAAAGAGGATACTG * XX:B:S,12561,2,20,112
r002 0 ref 9 30 1S2I6M1P1I1P1I4M2I * 0 0 AAAAGATAAGGGATAAA *
r003 0 ref 9 30 5H6M * 0 0 AGCTAA *
r004 0 ref 16 30 6M14N1I5M * 0 0 ATAGCTCTCAGC *
const char *r_sq_tags[] = {"SN","LN",NULL};
const char *u_sq_tags[] = {"SN",NULL};
-const char *o_rg_tags[] = {"LB","DS","PU","PI","CN","DT","PL",NULL};
+const char *o_rg_tags[] = {"CN","DS","DT","FO","KS","LB","PG","PI","PL","PU","SM",NULL};
const char *r_rg_tags[] = {"ID",NULL};
const char *u_rg_tags[] = {"ID",NULL};