6 KHASH_MAP_INIT_STR(str2id, int)
8 void *bcf_build_refhash(bcf_hdr_t *h)
10 khash_t(str2id) *hash;
12 hash = kh_init(str2id);
13 for (i = 0; i < h->n_ref; ++i) {
15 k = kh_put(str2id, hash, h->ns[i], &ret); // FIXME: check ret
21 void *bcf_str2id_init()
23 return kh_init(str2id);
26 void bcf_str2id_destroy(void *_hash)
28 khash_t(str2id) *hash = (khash_t(str2id)*)_hash;
29 if (hash) kh_destroy(str2id, hash); // Note that strings are not freed.
32 int bcf_str2id(void *_hash, const char *str)
34 khash_t(str2id) *hash = (khash_t(str2id)*)_hash;
37 k = kh_get(str2id, hash, str);
38 return k == kh_end(hash)? -1 : kh_val(hash, k);
41 int bcf_str2id_add(void *_hash, const char *str)
45 khash_t(str2id) *hash = (khash_t(str2id)*)_hash;
47 k = kh_put(str2id, hash, str, &ret);
48 if (ret == 0) return kh_val(hash, k);
49 kh_val(hash, k) = kh_size(hash) - 1;
50 return kh_val(hash, k);
53 int bcf_shrink_alt(bcf1_t *b, int n)
56 int i, j, k, *z, n_smpl = b->n_smpl;
57 if (b->n_alleles <= n) return -1;
59 for (p = b->alt, k = 1; *p; ++p)
60 if (*p == ',' && ++k == n) break;
62 } else p = b->alt, *p = '\0';
64 memmove(p, b->flt, b->str + b->l_str - b->flt);
65 b->l_str -= b->flt - p;
66 z = alloca(sizeof(int) / 2 * n * (n+1));
67 for (i = k = 0; i < n; ++i)
68 for (j = 0; j < n - i; ++j)
69 z[k++] = i * b->n_alleles + j;
70 for (i = 0; i < b->n_gi; ++i) {
71 bcf_ginfo_t *g = b->gi + i;
72 if (g->fmt == bcf_str2int("PL", 2)) {
73 int l, x = b->n_alleles * (b->n_alleles + 1) / 2;
74 uint8_t *d = (uint8_t*)g->data;
75 g->len = n * (n + 1) / 2;
76 for (l = k = 0; l < n_smpl; ++l) {
77 uint8_t *dl = d + l * x;
78 for (j = 0; j < g->len; ++j) d[k++] = dl[z[j]];
87 int bcf_gl2pl(bcf1_t *b)
90 int i, n_smpl = b->n_smpl;
94 if (strstr(b->fmt, "PL")) return -1;
95 if ((p = strstr(b->fmt, "GL")) == 0) return -1;
97 for (i = 0; i < b->n_gi; ++i)
98 if (b->gi[i].fmt == bcf_str2int("GL", 2))
101 g->fmt = bcf_str2int("PL", 2);
102 g->len /= 4; // 4 == sizeof(float)
103 d0 = (float*)g->data; d1 = (uint8_t*)g->data;
104 for (i = 0; i < n_smpl * g->len; ++i) {
105 int x = (int)(-10. * d0[i] + .499);
106 if (x > 255) x = 255;
112 /* FIXME: this function will fail given AB:GTX:GT. BCFtools never
113 * produces such FMT, but others may do. */
114 int bcf_fix_gt(bcf1_t *b)
120 // check the presence of the GT FMT
121 if ((s = strstr(b->fmt, ":GT")) == 0) return 0; // no GT or GT is already the first
122 if (s[3] != '\0' && s[3] != ':') return 0; // :GTX in fact
123 tmp = bcf_str2int("GT", 2);
124 for (i = 0; i < b->n_gi; ++i)
125 if (b->gi[i].fmt == tmp) break;
126 if (i == b->n_gi) return 0; // no GT in b->gi; probably a bug...
128 // move GT to the first
129 for (; i > 0; --i) b->gi[i] = b->gi[i-1];
131 memmove(b->fmt + 3, b->fmt, s + 1 - b->fmt);
132 b->fmt[0] = 'G'; b->fmt[1] = 'T'; b->fmt[2] = ':';
136 static void *locate_field(const bcf1_t *b, const char *fmt, int l)
140 tmp = bcf_str2int(fmt, l);
141 for (i = 0; i < b->n_gi; ++i)
142 if (b->gi[i].fmt == tmp) break;
143 return i == b->n_gi? 0 : b->gi[i].data;
146 int bcf_anno_max(bcf1_t *b)
148 int k, max_gq, max_sp, n_het;
150 uint8_t *gt, *gq, *sp;
151 max_gq = max_sp = n_het = 0;
152 gt = locate_field(b, "GT", 2);
153 if (gt == 0) return -1;
154 gq = locate_field(b, "GQ", 2);
155 sp = locate_field(b, "SP", 2);
157 for (k = 0; k < b->n_smpl; ++k)
159 max_sp = max_sp > (int)sp[k]? max_sp : sp[k];
161 for (k = 0; k < b->n_smpl; ++k)
163 max_gq = max_gq > (int)gq[k]? max_gq : gq[k];
164 for (k = 0; k < b->n_smpl; ++k) {
166 a1 = gt[k]&7; a2 = gt[k]>>3&7;
167 if ((!a1 && a2) || (!a2 && a1)) { // a het
168 if (gq == 0) ++n_het;
169 else if (gq[k] >= 20) ++n_het;
172 if (n_het) max_sp -= (int)(4.343 * log(n_het) + .499);
173 if (max_sp < 0) max_sp = 0;
174 memset(&str, 0, sizeof(kstring_t));
175 if (*b->info) kputc(';', &str);
176 ksprintf(&str, "MXSP=%d;MXGQ=%d", max_sp, max_gq);
177 bcf_append_info(b, str.s, str.l);