X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bcftools%2Fbcf.c;h=3393aad1edef67c24c548dbc290b15ac02c14caf;hb=221f82f662dd770a17d1bd2181de46b8c3b3fdfd;hp=f6c26f2db2e248c05ffefe34642a5a68b33ae95b;hpb=94027d8bf194b5e62b98d8a2b240a3ab1c439b46;p=samtools.git diff --git a/bcftools/bcf.c b/bcftools/bcf.c index f6c26f2..3393aad 100644 --- a/bcftools/bcf.c +++ b/bcftools/bcf.c @@ -109,12 +109,18 @@ int bcf_sync(int n_smpl, bcf1_t *b) b->ref = b->alt = b->flt = b->info = b->fmt = 0; for (p = b->str, n = 0; p < b->str + b->l_str; ++p) if (*p == 0 && p+1 != b->str + b->l_str) tmp[n++] = p + 1; - if (n != 5) return -1; + if (n != 5) { + fprintf(stderr, "[bcf_sync] incorrect number of fields (%d != 5)\n", n); + return -1; + } b->ref = tmp[0]; b->alt = tmp[1]; b->flt = tmp[2]; b->info = tmp[3]; b->fmt = tmp[4]; // set n_alleles - for (p = b->alt, n = 1; *p; ++p) - if (*p == ',') ++n; - b->n_alleles = n + 1; + if (*b->alt == 0) b->n_alleles = 1; + else { + for (p = b->alt, n = 1; *p; ++p) + if (*p == ',') ++n; + b->n_alleles = n + 1; + } // set n_gi and gi[i].fmt for (p = b->fmt, n = 1; *p; ++p) if (*p == ':') ++n; @@ -137,7 +143,7 @@ int bcf_sync(int n_smpl, bcf1_t *b) } else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("GT", 2)) { b->gi[i].len = 1; } else if (b->gi[i].fmt == bcf_str2int("GL", 2)) { - b->gi[i].len = 4; + b->gi[i].len = b->n_alleles * (b->n_alleles + 1) / 2 * 4; } b->gi[i].data = realloc(b->gi[i].data, n_smpl * b->gi[i].len); } @@ -146,13 +152,12 @@ int bcf_sync(int n_smpl, bcf1_t *b) int bcf_write(bcf_t *bp, const bcf_hdr_t *h, const bcf1_t *b) { - uint32_t x; int i, l = 0; if (b == 0) return -1; bgzf_write(bp->fp, &b->tid, 4); bgzf_write(bp->fp, &b->pos, 4); - x = b->qual<<24 | b->l_str; - bgzf_write(bp->fp, &x, 4); + bgzf_write(bp->fp, &b->qual, 4); + bgzf_write(bp->fp, &b->l_str, 4); bgzf_write(bp->fp, b->str, b->l_str); l = 12 + b->l_str; for (i = 0; i < b->n_gi; ++i) { @@ -165,12 +170,11 @@ int bcf_write(bcf_t *bp, const bcf_hdr_t *h, const bcf1_t *b) int bcf_read(bcf_t *bp, const bcf_hdr_t *h, bcf1_t *b) { int i, l = 0; - uint32_t x; if (b == 0) return -1; if (bgzf_read(bp->fp, &b->tid, 4) == 0) return -1; bgzf_read(bp->fp, &b->pos, 4); - bgzf_read(bp->fp, &x, 4); - b->qual = x >> 24; b->l_str = x << 8 >> 8; + bgzf_read(bp->fp, &b->qual, 4); + bgzf_read(bp->fp, &b->l_str, 4); if (b->l_str > b->m_str) { b->m_str = b->l_str; kroundup32(b->m_str); @@ -204,41 +208,67 @@ static inline void fmt_str(const char *p, kstring_t *s) else kputs(p, s); } -char *bcf_fmt(const bcf_hdr_t *h, bcf1_t *b) +void bcf_fmt_core(const bcf_hdr_t *h, bcf1_t *b, kstring_t *s) { - kstring_t s; int i, j, x; - memset(&s, 0, sizeof(kstring_t)); - kputs(h->ns[b->tid], &s); kputc('\t', &s); - kputw(b->pos + 1, &s); kputc('\t', &s); - fmt_str(b->str, &s); kputc('\t', &s); - fmt_str(b->ref, &s); kputc('\t', &s); - fmt_str(b->alt, &s); kputc('\t', &s); - kputw(b->qual, &s); kputc('\t', &s); - fmt_str(b->flt, &s); kputc('\t', &s); - fmt_str(b->info, &s); + s->l = 0; + if (h->n_ref) kputs(h->ns[b->tid], s); + else kputw(b->tid, s); + kputc('\t', s); + kputw(b->pos + 1, s); kputc('\t', s); + fmt_str(b->str, s); kputc('\t', s); + fmt_str(b->ref, s); kputc('\t', s); + fmt_str(b->alt, s); kputc('\t', s); + ksprintf(s, "%.3g", b->qual); /*kputw(b->qual, s);*/ kputc('\t', s); + fmt_str(b->flt, s); kputc('\t', s); + fmt_str(b->info, s); if (b->fmt[0]) { - kputc('\t', &s); - fmt_str(b->fmt, &s); + kputc('\t', s); + fmt_str(b->fmt, s); } x = b->n_alleles * (b->n_alleles + 1) / 2; + if (b->n_gi == 0) return; for (j = 0; j < h->n_smpl; ++j) { - kputc('\t', &s); + kputc('\t', s); for (i = 0; i < b->n_gi; ++i) { - if (i) kputc(':', &s); + if (i) kputc(':', s); if (b->gi[i].fmt == bcf_str2int("PL", 2)) { uint8_t *d = (uint8_t*)b->gi[i].data + j * x; int k; for (k = 0; k < x; ++k) { - if (k > 0) kputc(',', &s); - kputw(d[k], &s); + if (k > 0) kputc(',', s); + kputw(d[k], s); } } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) { - kputw(((uint16_t*)b->gi[i].data)[j], &s); + kputw(((uint16_t*)b->gi[i].data)[j], s); } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) { - kputw(((uint8_t*)b->gi[i].data)[j], &s); + kputw(((uint8_t*)b->gi[i].data)[j], s); + } else if (b->gi[i].fmt == bcf_str2int("GT", 2)) { + int y = ((uint8_t*)b->gi[i].data)[j]; + if (y>>7&1) { + kputsn("./.", 3, s); + } else { + kputc('0' + (y>>3&7), s); + kputc("/|"[y>>6&1], s); + kputc('0' + (y&7), s); + } + } else if (b->gi[i].fmt == bcf_str2int("GL", 2)) { + float *d = (float*)b->gi[i].data + j * x; + int k; + //printf("- %lx\n", d); + for (k = 0; k < x; ++k) { + if (k > 0) kputc(',', s); + ksprintf(s, "%.2f", d[k]); + } } } } +} + +char *bcf_fmt(const bcf_hdr_t *h, bcf1_t *b) +{ + kstring_t s; + s.l = s.m = 0; s.s = 0; + bcf_fmt_core(h, b, &s); return s.s; }