if (hash) kh_destroy(str2id, hash); // Note that strings are not freed.
}
+void bcf_str2id_thorough_destroy(void *_hash)
+{
+ khash_t(str2id) *hash = (khash_t(str2id)*)_hash;
+ khint_t k;
+ if (hash == 0) return;
+ for (k = 0; k < kh_end(hash); ++k)
+ if (kh_exist(hash, k)) free((char*)kh_key(hash, k));
+ kh_destroy(str2id, hash);
+}
+
int bcf_str2id(void *_hash, const char *str)
{
khash_t(str2id) *hash = (khash_t(str2id)*)_hash;
int bcf_shrink_alt(bcf1_t *b, int n)
{
char *p;
- int i, j, k, *z, n_smpl = b->n_smpl;
+ int i, j, k, n_smpl = b->n_smpl;
if (b->n_alleles <= n) return -1;
+ // update ALT
if (n > 1) {
for (p = b->alt, k = 1; *p; ++p)
if (*p == ',' && ++k == n) break;
++p;
memmove(p, b->flt, b->str + b->l_str - b->flt);
b->l_str -= b->flt - p;
- z = alloca(sizeof(int) / 2 * n * (n+1));
- for (i = k = 0; i < n; ++i)
- for (j = 0; j < n - i; ++j)
- z[k++] = i * b->n_alleles + j;
+ // update PL
for (i = 0; i < b->n_gi; ++i) {
bcf_ginfo_t *g = b->gi + i;
if (g->fmt == bcf_str2int("PL", 2)) {
g->len = n * (n + 1) / 2;
for (l = k = 0; l < n_smpl; ++l) {
uint8_t *dl = d + l * x;
- for (j = 0; j < g->len; ++j) d[k++] = dl[z[j]];
+ for (j = 0; j < g->len; ++j) d[k++] = dl[j];
}
} // FIXME: to add GL
}
{
int k, max_gq, max_sp, n_het;
kstring_t str;
- uint8_t *gt, *gq, *sp;
+ uint8_t *gt, *gq;
+ int32_t *sp;
max_gq = max_sp = n_het = 0;
gt = locate_field(b, "GT", 2);
if (gt == 0) return -1;
free(str.s);
return 0;
}
+
+bcf_hdr_t *bcf_hdr_subsam(const bcf_hdr_t *h0, int n, char *const* samples, int *list)
+{
+ int i, ret, j;
+ khint_t k;
+ bcf_hdr_t *h;
+ khash_t(str2id) *hash;
+ kstring_t s;
+ s.l = s.m = 0; s.s = 0;
+ hash = kh_init(str2id);
+ for (i = 0; i < n; ++i)
+ k = kh_put(str2id, hash, samples[i], &ret);
+ for (i = j = 0; i < h0->n_smpl; ++i) {
+ if (kh_get(str2id, hash, h0->sns[i]) != kh_end(hash)) {
+ list[j++] = i;
+ kputs(h0->sns[i], &s); kputc('\0', &s);
+ }
+ }
+ if (j < n) fprintf(stderr, "<%s> %d samples in the list but not in BCF.", __func__, n - j);
+ kh_destroy(str2id, hash);
+ h = calloc(1, sizeof(bcf_hdr_t));
+ *h = *h0;
+ h->ns = 0; h->sns = 0;
+ h->name = malloc(h->l_nm); memcpy(h->name, h0->name, h->l_nm);
+ h->txt = calloc(1, h->l_txt + 1); memcpy(h->txt, h0->txt, h->l_txt);
+ h->l_smpl = s.l; h->sname = s.s;
+ bcf_hdr_sync(h);
+ return h;
+}
+
+int bcf_subsam(int n_smpl, int *list, bcf1_t *b) // list MUST BE sorted
+{
+ int i, j;
+ for (j = 0; j < b->n_gi; ++j) {
+ bcf_ginfo_t *gi = b->gi + j;
+ for (i = 0; i < n_smpl; ++i)
+ if (i != list[i]) memcpy((uint8_t*)gi->data + i * gi->len, (uint8_t*)gi->data + list[i] * gi->len, gi->len);
+ }
+ b->n_smpl = n_smpl;
+ return 0;
+}