}
// an analogy to pileup_func() below
-static int glt_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pu, void *data)
+static int glt3_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pu, void *data)
{
pu_data_t *d = (pu_data_t*)data;
bam_maqindel_ret_t *r = 0;
int rb;
glf1_t *g;
- glf3_t g2;
+ glf3_t *g3;
if (d->fai == 0) {
- fprintf(stderr, "[glt_func] reference sequence is required for generating GLT. Abort!\n");
+ fprintf(stderr, "[glt3_func] reference sequence is required for generating GLT. Abort!\n");
exit(1);
}
if (d->hash && kh_get(64, d->hash, (uint64_t)tid<<32|pos) == kh_end(d->hash)) return 0;
+ g3 = glf3_init1();
if (d->fai && (int)tid != d->tid) {
if (d->ref) { // then write the end mark
- memset(&g2, 0, sizeof(glf3_t));
- g2.type = GLF_TYPE_END;
- bgzf_write(d->fp, &g2, sizeof(glf3_t));
+ g3->rtype = GLF3_RTYPE_END;
+ glf3_write1(d->fp, g3);
}
- glf_ref_write(d->fp, d->h->target_name[tid]); // write reference
+ glf3_ref_write(d->fp, d->h->target_name[tid], d->h->target_len[tid]); // write reference
free(d->ref);
d->ref = fai_fetch(d->fai, d->h->target_name[tid], &d->len);
d->tid = tid;
+ d->last_pos = 0;
}
rb = (d->ref && (int)pos < d->len)? d->ref[pos] : 'N';
g = bam_maqcns_glfgen(n, pu, bam_nt16_table[rb], d->c);
- memcpy(&g2, g, sizeof(glf1_t));
- g2.type = GLF_TYPE_NORMAL;
- g2.offset = pos - d->last_pos;
+ memcpy(g3, g, sizeof(glf1_t));
+ g3->rtype = GLF3_RTYPE_SUB;
+ g3->offset = pos - d->last_pos;
d->last_pos = pos;
- bgzf_write(d->fp, &g2, sizeof(glf3_t));
+ glf3_write1(d->fp, g3);
r = bam_maqindel(n, pos, d->ido, pu, d->ref);
if (r) { // then write indel line
- int g3 = 3 * n, min;
- min = g3;
+ int het = 3 * n, min;
+ min = het;
if (min > r->gl[0]) min = r->gl[0];
if (min > r->gl[1]) min = r->gl[1];
- g2.ref_base = 0;
- g2.type = GLF_TYPE_INDEL;
- memset(g2.lk, 0, 10);
- g2.lk[0] = r->gl[0] - min < 255? r->gl[0] - min : 255;
- g2.lk[1] = r->gl[1] - min < 255? r->gl[1] - min : 255;
- g2.lk[2] = g3 - min < 255? g3 - min : 255;
- *(int16_t*)(g2.lk + 3) = (int16_t)r->indel1;
- *(int16_t*)(g2.lk + 5) = (int16_t)r->indel2;
- g2.min_lk = min < 255? min : 255;
- bgzf_write(d->fp, &g2, sizeof(glf3_t));
- if (r->indel1) bgzf_write(d->fp, r->s[0]+1, r->indel1>0? r->indel1 : -r->indel1);
- if (r->indel2) bgzf_write(d->fp, r->s[1]+1, r->indel2>0? r->indel2 : -r->indel2);
+ g3->ref_base = 0;
+ g3->rtype = GLF3_RTYPE_INDEL;
+ memset(g3->lk, 0, 10);
+ g3->lk[0] = r->gl[0] - min < 255? r->gl[0] - min : 255;
+ g3->lk[1] = r->gl[1] - min < 255? r->gl[1] - min : 255;
+ g3->lk[2] = het - min < 255? het - min : 255;
+ g3->offset = 0;
+ g3->indel_len[0] = r->indel1;
+ g3->indel_len[1] = r->indel2;
+ g3->min_lk = min < 255? min : 255;
+ g3->max_len = (abs(r->indel1) > abs(r->indel2)? abs(r->indel1) : abs(r->indel2)) + 1;
+ g3->indel_seq[0] = strdup(r->s[0]+1);
+ g3->indel_seq[1] = strdup(r->s[1]+1);
+ glf3_write1(d->fp, g3);
bam_maqindel_ret_destroy(r);
}
free(g);
+ glf3_destroy1(g3);
return 0;
}
int i, j, rb, max_mapq = 0;
uint32_t x;
if (d->hash && kh_get(64, d->hash, (uint64_t)tid<<32|pos) == kh_end(d->hash)) return 0;
- if (d->format & BAM_PLF_GLF) return glt_func(tid, pos, n, pu, data);
+ if (d->format & BAM_PLF_GLF) return glt3_func(tid, pos, n, pu, data);
if (d->fai && (int)tid != d->tid) {
free(d->ref);
d->ref = fai_fetch(d->fai, d->h->target_name[tid], &d->len);
free(fn_fa);
bam_maqcns_prepare(d->c);
if (d->format & BAM_PLF_GLF) {
- glf_header_t *h;
- h = glf_header_init();
+ glf3_header_t *h;
+ h = glf3_header_init();
d->fp = bgzf_fdopen(fileno(stdout), "w");
- glf_header_write(d->fp, h);
- glf_header_destroy(h);
+ glf3_header_write(d->fp, h);
+ glf3_header_destroy(h);
}
if (fn_list) {
tamFile fp;
// then alias bgzf_*() functions
#endif
+static int glf3_is_BE = 0;
+
+static inline uint32_t bam_swap_endian_4(uint32_t v)
+{
+ v = ((v & 0x0000FFFFU) << 16) | (v >> 16);
+ return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);
+}
+
+static inline uint16_t bam_swap_endian_2(uint16_t v)
+{
+ return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8));
+}
+
static inline int bam_is_big_endian()
{
long one= 1;
return !(*((char *)(&one)));
}
-glf_header_t *glf_header_init()
+glf3_header_t *glf3_header_init()
{
- return (glf_header_t*)calloc(1, sizeof(glf_header_t));
+ glf3_is_BE = bam_is_big_endian();
+ return (glf3_header_t*)calloc(1, sizeof(glf3_header_t));
}
-glf_header_t *glf_header_read(glfFile fp)
+glf3_header_t *glf3_header_read(glfFile fp)
{
- glf_header_t *h;
+ glf3_header_t *h;
char magic[4];
- if (bam_is_big_endian()) {
- fprintf(stderr, "[glf_header_read] Big endian is detected. Abort.\n");
- exit(1);
- }
- h = (glf_header_t*)calloc(1, sizeof(glf_header_t));
+ h = glf3_header_init();
bgzf_read(fp, magic, 4);
if (strncmp(magic, "GLF\3", 4)) {
- fprintf(stderr, "[glf_header_read] invalid magic. Abort.\n");
+ fprintf(stderr, "[glf3_header_read] invalid magic. Abort.\n");
exit(1);
}
bgzf_read(fp, &h->l_text, 4);
+ if (glf3_is_BE) h->l_text = bam_swap_endian_4(h->l_text);
if (h->l_text) {
h->text = (uint8_t*)calloc(h->l_text + 1, 1);
bgzf_read(fp, h->text, h->l_text);
return h;
}
-void glf_header_write(glfFile fp, const glf_header_t *h)
+void glf3_header_write(glfFile fp, const glf3_header_t *h)
{
+ int32_t x;
bgzf_write(fp, "GLF\3", 4);
- bgzf_write(fp, &h->l_text, 4);
+ x = glf3_is_BE? bam_swap_endian_4(h->l_text) : h->l_text;
+ bgzf_write(fp, &x, 4);
if (h->l_text) bgzf_write(fp, h->text, h->l_text);
}
-void glf_header_destroy(glf_header_t *h)
+void glf3_header_destroy(glf3_header_t *h)
{
free(h->text);
free(h);
}
-char *glf_ref_read(glfFile fp)
+char *glf3_ref_read(glfFile fp, int *len)
{
- int32_t n;
+ int32_t n, x;
char *str;
+ *len = 0;
if (bgzf_read(fp, &n, 4) != 4) return 0;
+ if (glf3_is_BE) n = bam_swap_endian_4(n);
if (n < 0) {
- fprintf(stderr, "[glf_ref_read] invalid reference name length: %d.\n", n);
+ fprintf(stderr, "[glf3_ref_read] invalid reference name length: %d.\n", n);
return 0;
}
str = (char*)calloc(n + 1, 1); // not necesarily n+1 in fact
- bgzf_read(fp, str, n);
+ x = bgzf_read(fp, str, n);
+ x += bgzf_read(fp, len, 4);
+ if (x != n + 4) {
+ free(str); *len = -1; return 0; // truncated
+ }
+ if (glf3_is_BE) *len = bam_swap_endian_4(*len);
return str;
}
-void glf_ref_write(glfFile fp, const char *str)
+void glf3_ref_write(glfFile fp, const char *str, int len)
{
- int32_t n = strlen(str);
- ++n;
- bgzf_write(fp, &n, 4);
+ int32_t m, n = strlen(str) + 1;
+ m = glf3_is_BE? bam_swap_endian_4(n) : n;
+ bgzf_write(fp, &m, 4);
bgzf_write(fp, str, n);
+ if (glf3_is_BE) len = bam_swap_endian_4(len);
+ bgzf_write(fp, &len, 4);
}
-void glf_view_normal(const char *ref_name, glf3_t *g1, int pos)
+void glf3_view1(const char *ref_name, const glf3_t *g3, int pos)
{
int j;
- printf("%s\t%d\t%c\t%d\t%d\t%d", ref_name, pos + 1, "XACMGRSVTWYHKDBN"[g1->ref_base],
- g1->depth, g1->max_mapQ, g1->min_lk);
- for (j = 0; j != 10; ++j) printf("\t%d", g1->lk[j]);
+ if (g3->rtype == GLF3_RTYPE_END) return;
+ printf("%s\t%d\t%c\t%d\t%d\t%d", ref_name, pos + 1,
+ g3->rtype == GLF3_RTYPE_INDEL? '*' : "XACMGRSVTWYHKDBN"[g3->ref_base],
+ g3->depth, g3->rms_mapQ, g3->min_lk);
+ if (g3->rtype == GLF3_RTYPE_SUB)
+ for (j = 0; j != 10; ++j) printf("\t%d", g3->lk[j]);
+ else {
+ printf("\t%d\t%d\t%d\t%d\t%d\t%s\t%s\t", g3->lk[0], g3->lk[1], g3->lk[2], g3->indel_len[0], g3->indel_len[1],
+ g3->indel_len[0]? g3->indel_seq[0] : "*", g3->indel_len[1]? g3->indel_seq[1] : "*");
+ }
printf("\n");
}
-static char *glf_read_indel(glfFile fp, char *str, int *max, int16_t indel)
+int glf3_write1(glfFile fp, const glf3_t *g3)
{
- int l = indel > 0? indel : -indel;
- if (l + 1 > *max) {
- *max = l + 1;
- str = (char*)realloc(str, *max);
+ int r;
+ uint8_t c;
+ uint32_t y[2];
+ c = g3->rtype<<4 | g3->ref_base;
+ r = bgzf_write(fp, &c, 1);
+ if (g3->rtype == GLF3_RTYPE_END) return r;
+ y[0] = g3->offset;
+ y[1] = g3->min_lk<<24 | g3->depth;
+ if (glf3_is_BE) {
+ y[0] = bam_swap_endian_4(y[0]);
+ y[1] = bam_swap_endian_4(y[1]);
}
- bgzf_read(fp, str, l);
- str[l] = 0;
- return str;
+ r += bgzf_write(fp, y, 8);
+ r += bgzf_write(fp, &g3->rms_mapQ, 1);
+ if (g3->rtype == GLF3_RTYPE_SUB) r += bgzf_write(fp, g3->lk, 10);
+ else {
+ int16_t x[2];
+ r += bgzf_write(fp, g3->lk, 3);
+ x[0] = glf3_is_BE? bam_swap_endian_2(g3->indel_len[0]) : g3->indel_len[0];
+ x[1] = glf3_is_BE? bam_swap_endian_2(g3->indel_len[1]) : g3->indel_len[1];
+ r += bgzf_write(fp, x, 4);
+ if (g3->indel_len[0]) r += bgzf_write(fp, g3->indel_seq[0], abs(g3->indel_len[0]));
+ if (g3->indel_len[1]) r += bgzf_write(fp, g3->indel_seq[1], abs(g3->indel_len[1]));
+ }
+ return r;
+}
+
+#ifndef kv_roundup32
+#define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+#endif
+
+int glf3_read1(glfFile fp, glf3_t *g3)
+{
+ int r;
+ uint8_t c;
+ uint32_t y[2];
+ r = bgzf_read(fp, &c, 1);
+ if (r == 0) return 0;
+ g3->ref_base = c & 0xf;
+ g3->rtype = c>>4;
+ if (g3->rtype == GLF3_RTYPE_END) return r;
+ r += bgzf_read(fp, y, 8);
+ if (glf3_is_BE) {
+ y[0] = bam_swap_endian_4(y[0]);
+ y[1] = bam_swap_endian_4(y[1]);
+ }
+ g3->offset = y[0];
+ g3->min_lk = y[1]>>24;
+ g3->depth = y[1]<<8>>8;
+ r += bgzf_read(fp, &g3->rms_mapQ, 1);
+ if (g3->rtype == GLF3_RTYPE_SUB) r += bgzf_read(fp, g3->lk, 10);
+ else {
+ int16_t x[2], max;
+ r += bgzf_read(fp, g3->lk, 3);
+ r += bgzf_read(fp, x, 4);
+ if (glf3_is_BE) {
+ x[0] = bam_swap_endian_2(x[0]);
+ x[1] = bam_swap_endian_2(x[1]);
+ }
+ g3->indel_len[0] = x[0];
+ g3->indel_len[1] = x[1];
+ x[0] = abs(x[0]); x[1] = abs(x[1]);
+ max = (x[0] > x[1]? x[0] : x[1]) + 1;
+ if (g3->max_len < max) {
+ g3->max_len = max;
+ kv_roundup32(g3->max_len);
+ g3->indel_seq[0] = (char*)realloc(g3->indel_seq[0], g3->max_len);
+ g3->indel_seq[1] = (char*)realloc(g3->indel_seq[1], g3->max_len);
+ }
+ r += bgzf_read(fp, g3->indel_seq[0], x[0]);
+ r += bgzf_read(fp, g3->indel_seq[1], x[1]);
+ g3->indel_seq[0][x[0]] = g3->indel_seq[1][x[1]] = 0;
+ }
+ return r;
}
-void glf_view(glfFile fp)
+void glf3_view(glfFile fp)
{
- glf_header_t *h;
- char *name, *str;
- glf3_t g2;
- int max;
-
- h = glf_header_read(fp);
- str = 0; max = 0;
- while ((name = glf_ref_read(fp)) != 0) {
+ glf3_header_t *h;
+ char *name;
+ glf3_t *g3;
+ int len;
+ h = glf3_header_read(fp);
+ g3 = glf3_init1();
+ while ((name = glf3_ref_read(fp, &len)) != 0) {
int pos = 0;
- while (bgzf_read(fp, &g2, sizeof(glf3_t))) {
- if (g2.type == GLF_TYPE_END) break;
- pos += g2.offset;
- if (g2.type == GLF_TYPE_NORMAL) glf_view_normal(name, &g2, pos);
- else if (g2.type == GLF_TYPE_INDEL) {
- int16_t indel1, indel2;
- printf("%s\t%d\t*\t%d\t%d\t%d\t", name, pos + 1, g2.depth, g2.max_mapQ, g2.min_lk);
- printf("%d\t%d\t%d\t", g2.lk[0], g2.lk[1], g2.lk[2]);
- indel1 = *(int16_t*)(g2.lk + 3);
- indel2 = *(int16_t*)(g2.lk + 5);
- printf("%d\t%d\t", indel1, indel2);
- if (indel1) {
- str = glf_read_indel(fp, str, &max, indel1);
- printf("%c%d%s\t", indel1>0? '+':'-', indel1>0?indel1:-indel1, str);
- } else printf("*\t");
- if (indel2) {
- str = glf_read_indel(fp, str, &max, indel2);
- printf("%c%d%s\n", indel2>0? '+':'-', indel2>0?indel2:-indel2, str);
- } else printf("*\n");
- }
+ while (glf3_read1(fp, g3) && g3->rtype != GLF3_RTYPE_END) {
+ pos += g3->offset;
+ glf3_view1(name, g3, pos);
}
free(name);
}
- glf_header_destroy(h);
- free(str);
+ glf3_header_destroy(h);
+ glf3_destroy1(g3);
}
-int glf_view_main(int argc, char *argv[])
+int glf3_view_main(int argc, char *argv[])
{
glfFile fp;
if (argc == 1) {
fprintf(stderr, "Fail to open file '%s'\n", argv[1]);
return 1;
}
- glf_view(fp);
+ glf3_view(fp);
bgzf_close(fp);
return 0;
}
#ifdef GLFVIEW_MAIN
int main(int argc, char *argv[])
{
- return glf_view_main(argc, argv);
+ return glf3_view_main(argc, argv);
}
#endif
#include "bgzf.h"
typedef BGZF *glfFile;
-#define GLF_TYPE_NORMAL 0
-#define GLF_TYPE_INDEL 1
-#define GLF_TYPE_END 15
+#define GLF3_RTYPE_END 0
+#define GLF3_RTYPE_SUB 1
+#define GLF3_RTYPE_INDEL 2
typedef struct {
- unsigned char ref_base:4, type:4; /** "XACMGRSVTWYHKDBN"[ref_base] gives the reference base */
- unsigned char max_mapQ; /** maximum mapping quality */
- unsigned char lk[10]; /** log likelihood ratio, capped at 255 */
- unsigned min_lk:8, depth:24; /** minimum lk capped at 255, and the number of mapped reads */
- unsigned offset; /** the first base in a chromosome has offset zero. */
+ uint8_t ref_base:4, rtype:4; /** "XACMGRSVTWYHKDBN"[ref_base] gives the reference base */
+ uint8_t rms_mapQ; /** RMS mapping quality */
+ uint8_t lk[10]; /** log likelihood ratio, capped at 255 */
+ uint32_t min_lk:8, depth:24; /** minimum lk capped at 255, and the number of mapped reads */
+ int32_t offset; /** the first base in a chromosome has offset zero. */
+ // for indel (lkHom1, lkHom2 and lkHet are the first three elements in lk[10])
+ int16_t indel_len[2];
+ int32_t max_len; // maximum indel len; will be modified by glf3_read1()
+ char *indel_seq[2];
} glf3_t;
typedef struct {
int32_t l_text;
uint8_t *text;
-} glf_header_t;
+} glf3_header_t;
#ifdef __cplusplus
extern "C" {
#endif
- glf_header_t *glf_header_init();
- glf_header_t *glf_header_read(glfFile fp);
- void glf_header_write(glfFile fp, const glf_header_t *h);
- void glf_header_destroy(glf_header_t *h);
- char *glf_ref_read(glfFile fp);
- void glf_ref_write(glfFile fp, const char *str);
+#define glf3_init1() ((glf3_t*)calloc(1, sizeof(glf3_t)))
+#define glf3_destroy1(g3) do { free((g3)->indel_seq[0]); free((g3)->indel_seq[1]); free(g3); } while (0)
+
+ glf3_header_t *glf3_header_init();
+ glf3_header_t *glf3_header_read(glfFile fp);
+ void glf3_header_write(glfFile fp, const glf3_header_t *h);
+ void glf3_header_destroy(glf3_header_t *h);
+ char *glf3_ref_read(glfFile fp, int *len);
+ void glf3_ref_write(glfFile fp, const char *name, int len);
+ int glf3_write1(glfFile fp, const glf3_t *g3);
+ int glf3_read1(glfFile fp, glf3_t *g3);
#ifdef __cplusplus
}