+/* FIXME: this function will fail given AB:GTX:GT. BCFtools never
+ * produces such FMT, but others may do. */
+int bcf_fix_gt(bcf1_t *b)
+{
+ char *s;
+ int i;
+ uint32_t tmp;
+ bcf_ginfo_t gt;
+ // check the presence of the GT FMT
+ if ((s = strstr(b->fmt, ":GT")) == 0) return 0; // no GT or GT is already the first
+ if (s[3] != '\0' && s[3] != ':') return 0; // :GTX in fact
+ tmp = bcf_str2int("GT", 2);
+ for (i = 0; i < b->n_gi; ++i)
+ if (b->gi[i].fmt == tmp) break;
+ if (i == b->n_gi) return 0; // no GT in b->gi; probably a bug...
+ gt = b->gi[i];
+ // move GT to the first
+ for (; i > 0; --i) b->gi[i] = b->gi[i-1];
+ b->gi[0] = gt;
+ memmove(b->fmt + 3, b->fmt, s + 1 - b->fmt);
+ b->fmt[0] = 'G'; b->fmt[1] = 'T'; b->fmt[2] = ':';
+ return 0;
+}
+
+static void *locate_field(const bcf1_t *b, const char *fmt, int l)
+{
+ int i;
+ uint32_t tmp;
+ tmp = bcf_str2int(fmt, l);
+ for (i = 0; i < b->n_gi; ++i)
+ if (b->gi[i].fmt == tmp) break;
+ return i == b->n_gi? 0 : b->gi[i].data;
+}
+
+int bcf_anno_max(bcf1_t *b)
+{
+ int k, max_gq, max_sp, n_het;
+ kstring_t str;
+ uint8_t *gt, *gq;
+ int32_t *sp;
+ max_gq = max_sp = n_het = 0;
+ gt = locate_field(b, "GT", 2);
+ if (gt == 0) return -1;
+ gq = locate_field(b, "GQ", 2);
+ sp = locate_field(b, "SP", 2);
+ if (sp)
+ for (k = 0; k < b->n_smpl; ++k)
+ if (gt[k]&0x3f)
+ max_sp = max_sp > (int)sp[k]? max_sp : sp[k];
+ if (gq)
+ for (k = 0; k < b->n_smpl; ++k)
+ if (gt[k]&0x3f)
+ max_gq = max_gq > (int)gq[k]? max_gq : gq[k];
+ for (k = 0; k < b->n_smpl; ++k) {
+ int a1, a2;
+ a1 = gt[k]&7; a2 = gt[k]>>3&7;
+ if ((!a1 && a2) || (!a2 && a1)) { // a het
+ if (gq == 0) ++n_het;
+ else if (gq[k] >= 20) ++n_het;
+ }
+ }
+ if (n_het) max_sp -= (int)(4.343 * log(n_het) + .499);
+ if (max_sp < 0) max_sp = 0;
+ memset(&str, 0, sizeof(kstring_t));
+ if (*b->info) kputc(';', &str);
+ ksprintf(&str, "MXSP=%d;MXGQ=%d", max_sp, max_gq);
+ bcf_append_info(b, str.s, str.l);
+ free(str.s);
+ return 0;
+}
+
+bcf_hdr_t *bcf_hdr_subsam(const bcf_hdr_t *h0, int n, char *const* samples, int *list)
+{
+ int i, ret, j;
+ khint_t k;
+ bcf_hdr_t *h;
+ khash_t(str2id) *hash;
+ kstring_t s;
+ s.l = s.m = 0; s.s = 0;
+ hash = kh_init(str2id);
+ for (i = 0; i < n; ++i)
+ k = kh_put(str2id, hash, samples[i], &ret);
+ for (i = j = 0; i < h0->n_smpl; ++i) {
+ if (kh_get(str2id, hash, h0->sns[i]) != kh_end(hash)) {
+ list[j++] = i;
+ kputs(h0->sns[i], &s); kputc('\0', &s);
+ }
+ }
+ if (j < n) fprintf(stderr, "<%s> %d samples in the list but not in BCF.", __func__, n - j);
+ kh_destroy(str2id, hash);
+ h = calloc(1, sizeof(bcf_hdr_t));
+ *h = *h0;
+ h->ns = 0; h->sns = 0;
+ h->name = malloc(h->l_nm); memcpy(h->name, h0->name, h->l_nm);
+ h->txt = calloc(1, h->l_txt + 1); memcpy(h->txt, h0->txt, h->l_txt);
+ h->l_smpl = s.l; h->sname = s.s;
+ bcf_hdr_sync(h);
+ return h;
+}
+
+int bcf_subsam(int n_smpl, int *list, bcf1_t *b) // list MUST BE sorted
+{
+ int i, j;
+ for (j = 0; j < b->n_gi; ++j) {
+ bcf_ginfo_t *gi = b->gi + j;
+ for (i = 0; i < n_smpl; ++i)
+ if (i != list[i]) memcpy((uint8_t*)gi->data + i * gi->len, (uint8_t*)gi->data + list[i] * gi->len, gi->len);
+ }
+ b->n_smpl = n_smpl;
+ return 0;
+}