X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=glf.c;h=8d5346ae70af825afeeefaa2d50f276749d5301f;hb=8cab5e6afd245a6dbbb1580dc37cd3ee36e55078;hp=302f9ef3bf06864ceba7b258ae3ccdd25f21c8be;hpb=635998cfe030da5f3dbec42a6daa3ca82fa5c871;p=samtools.git diff --git a/glf.c b/glf.c index 302f9ef..8d5346a 100644 --- a/glf.c +++ b/glf.c @@ -6,32 +6,44 @@ // then alias bgzf_*() functions #endif +static int glf3_is_BE = 0; + +static inline uint32_t bam_swap_endian_4(uint32_t v) +{ + v = ((v & 0x0000FFFFU) << 16) | (v >> 16); + return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); +} + +static inline uint16_t bam_swap_endian_2(uint16_t v) +{ + return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8)); +} + static inline int bam_is_big_endian() { long one= 1; return !(*((char *)(&one))); } -glf_header_t *glf_header_init() +glf3_header_t *glf3_header_init() { - return (glf_header_t*)calloc(1, sizeof(glf_header_t)); + glf3_is_BE = bam_is_big_endian(); + return (glf3_header_t*)calloc(1, sizeof(glf3_header_t)); } -glf_header_t *glf_header_read(glfFile fp) +glf3_header_t *glf3_header_read(glfFile fp) { - glf_header_t *h; + glf3_header_t *h; char magic[4]; - if (bam_is_big_endian()) { - fprintf(stderr, "[glf_header_read] Big endian is detected. Abort.\n"); - exit(1); - } - h = (glf_header_t*)calloc(1, sizeof(glf_header_t)); + h = glf3_header_init(); bgzf_read(fp, magic, 4); - if (strncmp(magic, "GLF\2", 4)) { - fprintf(stderr, "[glf_header_read] invalid magic. Abort.\n"); - exit(1); + if (strncmp(magic, "GLF\3", 4)) { + fprintf(stderr, "[glf3_header_read] invalid magic.\n"); + glf3_header_destroy(h); + return 0; } bgzf_read(fp, &h->l_text, 4); + if (glf3_is_BE) h->l_text = bam_swap_endian_4(h->l_text); if (h->l_text) { h->text = (uint8_t*)calloc(h->l_text + 1, 1); bgzf_read(fp, h->text, h->l_text); @@ -39,99 +51,167 @@ glf_header_t *glf_header_read(glfFile fp) return h; } -void glf_header_write(glfFile fp, const glf_header_t *h) +void glf3_header_write(glfFile fp, const glf3_header_t *h) { - bgzf_write(fp, "GLF\2", 4); - bgzf_write(fp, &h->l_text, 4); + int32_t x; + bgzf_write(fp, "GLF\3", 4); + x = glf3_is_BE? bam_swap_endian_4(h->l_text) : h->l_text; + bgzf_write(fp, &x, 4); if (h->l_text) bgzf_write(fp, h->text, h->l_text); } -void glf_header_destroy(glf_header_t *h) +void glf3_header_destroy(glf3_header_t *h) { free(h->text); free(h); } -char *glf_ref_read(glfFile fp) +char *glf3_ref_read(glfFile fp, int *len) { - int32_t n; + int32_t n, x; char *str; + *len = 0; if (bgzf_read(fp, &n, 4) != 4) return 0; + if (glf3_is_BE) n = bam_swap_endian_4(n); if (n < 0) { - fprintf(stderr, "[glf_ref_read] invalid reference name length: %d.\n", n); + fprintf(stderr, "[glf3_ref_read] invalid reference name length: %d.\n", n); return 0; } str = (char*)calloc(n + 1, 1); // not necesarily n+1 in fact - bgzf_read(fp, str, n); + x = bgzf_read(fp, str, n); + x += bgzf_read(fp, len, 4); + if (x != n + 4) { + free(str); *len = -1; return 0; // truncated + } + if (glf3_is_BE) *len = bam_swap_endian_4(*len); return str; } -void glf_ref_write(glfFile fp, const char *str) +void glf3_ref_write(glfFile fp, const char *str, int len) { - int32_t n = strlen(str); - ++n; - bgzf_write(fp, &n, 4); + int32_t m, n = strlen(str) + 1; + m = glf3_is_BE? bam_swap_endian_4(n) : n; + bgzf_write(fp, &m, 4); bgzf_write(fp, str, n); + if (glf3_is_BE) len = bam_swap_endian_4(len); + bgzf_write(fp, &len, 4); } -void glf_view_normal(const char *ref_name, glf2_t *g1) +void glf3_view1(const char *ref_name, const glf3_t *g3, int pos) { int j; - printf("%s\t%d\t%c\t%d\t%d\t%d", ref_name, g1->pos + 1, "XACMGRSVTWYHKDBN"[g1->ref_base], - g1->depth, g1->max_mapQ, g1->min_lk); - for (j = 0; j != 10; ++j) printf("\t%d", g1->lk[j]); + if (g3->rtype == GLF3_RTYPE_END) return; + printf("%s\t%d\t%c\t%d\t%d\t%d", ref_name, pos + 1, + g3->rtype == GLF3_RTYPE_INDEL? '*' : "XACMGRSVTWYHKDBN"[g3->ref_base], + g3->depth, g3->rms_mapQ, g3->min_lk); + if (g3->rtype == GLF3_RTYPE_SUB) + for (j = 0; j != 10; ++j) printf("\t%d", g3->lk[j]); + else { + printf("\t%d\t%d\t%d\t%d\t%d\t%s\t%s\t", g3->lk[0], g3->lk[1], g3->lk[2], g3->indel_len[0], g3->indel_len[1], + g3->indel_len[0]? g3->indel_seq[0] : "*", g3->indel_len[1]? g3->indel_seq[1] : "*"); + } printf("\n"); } -static char *glf_read_indel(glfFile fp, char *str, int *max, int16_t indel) +int glf3_write1(glfFile fp, const glf3_t *g3) { - int l = indel > 0? indel : -indel; - if (l + 1 > *max) { - *max = l + 1; - str = (char*)realloc(str, *max); + int r; + uint8_t c; + uint32_t y[2]; + c = g3->rtype<<4 | g3->ref_base; + r = bgzf_write(fp, &c, 1); + if (g3->rtype == GLF3_RTYPE_END) return r; + y[0] = g3->offset; + y[1] = g3->min_lk<<24 | g3->depth; + if (glf3_is_BE) { + y[0] = bam_swap_endian_4(y[0]); + y[1] = bam_swap_endian_4(y[1]); } - bgzf_read(fp, str, l); - str[l] = 0; - return str; + r += bgzf_write(fp, y, 8); + r += bgzf_write(fp, &g3->rms_mapQ, 1); + if (g3->rtype == GLF3_RTYPE_SUB) r += bgzf_write(fp, g3->lk, 10); + else { + int16_t x[2]; + r += bgzf_write(fp, g3->lk, 3); + x[0] = glf3_is_BE? bam_swap_endian_2(g3->indel_len[0]) : g3->indel_len[0]; + x[1] = glf3_is_BE? bam_swap_endian_2(g3->indel_len[1]) : g3->indel_len[1]; + r += bgzf_write(fp, x, 4); + if (g3->indel_len[0]) r += bgzf_write(fp, g3->indel_seq[0], abs(g3->indel_len[0])); + if (g3->indel_len[1]) r += bgzf_write(fp, g3->indel_seq[1], abs(g3->indel_len[1])); + } + return r; +} + +#ifndef kv_roundup32 +#define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +int glf3_read1(glfFile fp, glf3_t *g3) +{ + int r; + uint8_t c; + uint32_t y[2]; + r = bgzf_read(fp, &c, 1); + if (r == 0) return 0; + g3->ref_base = c & 0xf; + g3->rtype = c>>4; + if (g3->rtype == GLF3_RTYPE_END) return r; + r += bgzf_read(fp, y, 8); + if (glf3_is_BE) { + y[0] = bam_swap_endian_4(y[0]); + y[1] = bam_swap_endian_4(y[1]); + } + g3->offset = y[0]; + g3->min_lk = y[1]>>24; + g3->depth = y[1]<<8>>8; + r += bgzf_read(fp, &g3->rms_mapQ, 1); + if (g3->rtype == GLF3_RTYPE_SUB) r += bgzf_read(fp, g3->lk, 10); + else { + int16_t x[2], max; + r += bgzf_read(fp, g3->lk, 3); + r += bgzf_read(fp, x, 4); + if (glf3_is_BE) { + x[0] = bam_swap_endian_2(x[0]); + x[1] = bam_swap_endian_2(x[1]); + } + g3->indel_len[0] = x[0]; + g3->indel_len[1] = x[1]; + x[0] = abs(x[0]); x[1] = abs(x[1]); + max = (x[0] > x[1]? x[0] : x[1]) + 1; + if (g3->max_len < max) { + g3->max_len = max; + kv_roundup32(g3->max_len); + g3->indel_seq[0] = (char*)realloc(g3->indel_seq[0], g3->max_len); + g3->indel_seq[1] = (char*)realloc(g3->indel_seq[1], g3->max_len); + } + r += bgzf_read(fp, g3->indel_seq[0], x[0]); + r += bgzf_read(fp, g3->indel_seq[1], x[1]); + g3->indel_seq[0][x[0]] = g3->indel_seq[1][x[1]] = 0; + } + return r; } -void glf_view(glfFile fp) +void glf3_view(glfFile fp) { - glf_header_t *h; - char *name, *str; - glf2_t g2; - int max; - - h = glf_header_read(fp); - str = 0; max = 0; - while ((name = glf_ref_read(fp)) != 0) { - while (bgzf_read(fp, &g2, sizeof(glf2_t))) { - if (g2.type == GLF_TYPE_END) break; - else if (g2.type == GLF_TYPE_NORMAL) glf_view_normal(name, &g2); - else if (g2.type == GLF_TYPE_INDEL) { - int16_t indel1, indel2; - printf("%s\t%d\t*\t%d\t%d\t%d\t", name, g2.pos + 1, g2.depth, g2.max_mapQ, g2.min_lk); - printf("%d\t%d\t%d\t", g2.lk[0], g2.lk[1], g2.lk[2]); - indel1 = *(int16_t*)(g2.lk + 3); - indel2 = *(int16_t*)(g2.lk + 5); - printf("%d\t%d\t", indel1, indel2); - if (indel1) { - str = glf_read_indel(fp, str, &max, indel1); - printf("%c%d%s\t", indel1>0? '+':'-', indel1>0?indel1:-indel1, str); - } else printf("*\t"); - if (indel2) { - str = glf_read_indel(fp, str, &max, indel2); - printf("%c%d%s\n", indel2>0? '+':'-', indel2>0?indel2:-indel2, str); - } else printf("*\n"); - } + glf3_header_t *h; + char *name; + glf3_t *g3; + int len; + h = glf3_header_read(fp); + g3 = glf3_init1(); + while ((name = glf3_ref_read(fp, &len)) != 0) { + int pos = 0; + while (glf3_read1(fp, g3) && g3->rtype != GLF3_RTYPE_END) { + pos += g3->offset; + glf3_view1(name, g3, pos); } free(name); } - glf_header_destroy(h); - free(str); + glf3_header_destroy(h); + glf3_destroy1(g3); } -int glf_view_main(int argc, char *argv[]) +int glf3_view_main(int argc, char *argv[]) { glfFile fp; if (argc == 1) { @@ -143,7 +223,7 @@ int glf_view_main(int argc, char *argv[]) fprintf(stderr, "Fail to open file '%s'\n", argv[1]); return 1; } - glf_view(fp); + glf3_view(fp); bgzf_close(fp); return 0; } @@ -151,6 +231,6 @@ int glf_view_main(int argc, char *argv[]) #ifdef GLFVIEW_MAIN int main(int argc, char *argv[]) { - return glf_view_main(argc, argv); + return glf3_view_main(argc, argv); } #endif