X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;ds=sidebyside;f=bcftools%2Fbcfutils.c;h=7988e580db1fde1d9d2f23d2cd6485b91e1d4126;hb=ec294adf095b60c90e57e31f3af1335138c5a22a;hp=0eab4c1f322b398750fdda9da4caec6eea8e7579;hpb=700d451b57c06a5fc842b24be0d69d4a251c91df;p=samtools.git diff --git a/bcftools/bcfutils.c b/bcftools/bcfutils.c index 0eab4c1..7988e58 100644 --- a/bcftools/bcfutils.c +++ b/bcftools/bcfutils.c @@ -1,5 +1,6 @@ #include #include +#include #include "bcf.h" #include "kstring.h" #include "khash.h" @@ -66,6 +67,121 @@ int bcf_str2id_add(void *_hash, const char *str) return kh_val(hash, k); } +void bcf_fit_alt(bcf1_t *b, int mask) +{ + mask |= 1; // REF must be always present + + int i,j,nals=0; + for (i=0; in_alleles <= nals ) return; + +#if DBG + printf("fit_alt: %s, %s, mask=%d, nals=%d: ", b->ref, b->alt, mask, nals); + for (i=0; i1 ) + { + char *dst, *src; + int n=0, nalts=nals-1; + for (src=dst=p=b->alt, i=1; *p; p++) + { + if ( *p!=',' ) continue; + + if ( mask&1<=nalts ) { *dst=0; break; } + src = p+1; + } + if ( nalt, *p = '\0'; +#if DBG + printf("fit_alt: %s, mask=%d\n", b->alt, mask); +#endif + p++; + memmove(p, b->flt, b->str + b->l_str - b->flt); + b->l_str -= b->flt - p; + + // update PL and GT + int ipl=-1, igt=-1; + for (i = 0; i < b->n_gi; ++i) + { + bcf_ginfo_t *g = b->gi + i; + if (g->fmt == bcf_str2int("PL", 2)) ipl = i; + if (g->fmt == bcf_str2int("GT", 2)) igt = i; + } + + // .. create mapping between old and new indexes + int npl = nals * (nals+1) / 2; + int *map = malloc(sizeof(int)*(npl>b->n_alleles ? npl : b->n_alleles)); + int kori=0,knew=0; + for (i=0; in_alleles; i++) + { + for (j=0; j<=i; j++) + { + int skip=0; + if ( i && !(mask&1<n_smpl; + for (i = 0; i < b->n_gi; ++i) + { + bcf_ginfo_t *g = b->gi + i; + if (g->fmt == bcf_str2int("PL", 2)) + { + g->len = npl; + uint8_t *d = (uint8_t*)g->data; + int ismpl, npl_ori = b->n_alleles * (b->n_alleles + 1) / 2; + for (knew=ismpl=0; ismpln_alleles; i++) + map[i] = mask&1<gi[igt].data)[i]; + int a1 = (gt>>3)&7; + int a2 = gt&7; + assert( map[a1]>=0 && map[a2]>=0 ); + ((uint8_t*)b->gi[igt].data)[i] = ((1<<7|1<<6)>) | map[a1]<<3 | map[a2]; + } + free(map); + b->n_alleles = nals; +} + int bcf_shrink_alt(bcf1_t *b, int n) { char *p;