From 23e43ef6778e51eb2a71046282d1c21431520857 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Wed, 13 Jul 2011 13:50:16 +0000 Subject: [PATCH] support trio indels --- bcftools/bcf.h | 4 +++- bcftools/bcfutils.c | 26 ++++++++++++++++++++++++-- bcftools/call1.c | 2 +- bcftools/mut.c | 5 ++++- ksort.h | 4 ---- 5 files changed, 32 insertions(+), 9 deletions(-) diff --git a/bcftools/bcf.h b/bcftools/bcf.h index aeae6ca..822ae5c 100644 --- a/bcftools/bcf.h +++ b/bcftools/bcf.h @@ -28,7 +28,7 @@ #ifndef BCF_H #define BCF_H -#define BCF_VERSION "0.1.17 (r973:277)" +#define BCF_VERSION "0.1.17-dev (r973:277)" #include #include @@ -154,6 +154,8 @@ extern "C" { int bcf_fix_pl(bcf1_t *b); // convert PL to GLF-like 10-likelihood GL int bcf_gl10(const bcf1_t *b, uint8_t *gl); + // convert up to 4 INDEL alleles to GLF-like 10-likelihood GL + int bcf_gl10_indel(const bcf1_t *b, uint8_t *gl); // string hash table void *bcf_build_refhash(bcf_hdr_t *h); diff --git a/bcftools/bcfutils.c b/bcftools/bcfutils.c index 9608846..0eab4c1 100644 --- a/bcftools/bcfutils.c +++ b/bcftools/bcfutils.c @@ -354,8 +354,6 @@ int bcf_gl10(const bcf1_t *b, uint8_t *gl) for (i = 0; i < b->n_smpl; ++i) { const uint8_t *p = PL->data + i * PL->len; // the PL for the i-th individual uint8_t *g = gl + 10 * i; - for (j = 0; j < PL->len; ++j) - if (p[j]) break; for (k = j = 0; k < 4; ++k) { for (l = k; l < 4; ++l) { int t, x = map[k], y = map[l]; @@ -366,3 +364,27 @@ int bcf_gl10(const bcf1_t *b, uint8_t *gl) } return 0; } + +int bcf_gl10_indel(const bcf1_t *b, uint8_t *gl) +{ + int k, l, j, i; + const bcf_ginfo_t *PL; + if (b->alt[0] == 0) return -1; // no alternate allele + for (i = 0; i < b->n_gi; ++i) + if (b->gi[i].fmt == bcf_str2int("PL", 2)) break; + if (i == b->n_gi) return -1; // no PL + PL = b->gi + i; + for (i = 0; i < b->n_smpl; ++i) { + const uint8_t *p = PL->data + i * PL->len; // the PL for the i-th individual + uint8_t *g = gl + 10 * i; + for (k = j = 0; k < 4; ++k) { + for (l = k; l < 4; ++l) { + int t, x = k, y = l; + if (x > y) t = x, x = y, y = t; // make sure x is the smaller + x = y * (y+1) / 2 + x; + g[j++] = x < PL->len? p[x] : 255; + } + } + } + return 0; +} diff --git a/bcftools/call1.c b/bcftools/call1.c index 69bd5f7..b0baee2 100644 --- a/bcftools/call1.c +++ b/bcftools/call1.c @@ -495,7 +495,7 @@ int bcfview(int argc, char *argv[]) if (!(l > begin && end > b->pos)) continue; } ++n_processed; - if (vc.flag & VC_QCNT) { // summarize the difference + if ((vc.flag & VC_QCNT) && !is_indel) { // summarize the difference int x = bcf_min_diff(b); if (x > 255) x = 255; if (x >= 0) ++qcnt[x]; diff --git a/bcftools/mut.c b/bcftools/mut.c index 4b7cd10..15ef265 100644 --- a/bcftools/mut.c +++ b/bcftools/mut.c @@ -33,6 +33,7 @@ uint32_t *bcf_trio_prep(int is_x, int is_son) return ret; } + int bcf_trio_call(const uint32_t *prep, const bcf1_t *b, int *llr, int64_t *gt) { int i, j, k; @@ -44,7 +45,9 @@ int bcf_trio_call(const uint32_t *prep, const bcf1_t *b, int *llr, int64_t *gt) if (b->gi[i].fmt == bcf_str2int("PL", 2)) break; if (i == b->n_gi) return -1; // no PL gl10 = alloca(10 * b->n_smpl); - if (bcf_gl10(b, gl10) < 0) return -1; + if (bcf_gl10(b, gl10) < 0) { + if (bcf_gl10_indel(b, gl10) < 0) return -1; + } PL = b->gi + i; for (i = 0, k = 0; i < 4; ++i) for (j = i; j < 4; ++j) diff --git a/ksort.h b/ksort.h index b8b62f1..fa850ab 100644 --- a/ksort.h +++ b/ksort.h @@ -60,10 +60,6 @@ #include #include -#ifdef _WIN32 -#define drand48() ((double)rand() / RAND_MAX) -#endif - typedef struct { void *left, *right; int depth; -- 2.39.2