for (i = 0; i < n; ++i)
for (j = 0; j < 4; ++j)
qsum[j] += calls[i].qsum[j];
+ int qsum_tot=0;
+ for (j=0; j<4; j++) qsum_tot += qsum[j];
for (j = 0; j < 4; ++j) qsum[j] = qsum[j] << 2 | j;
// find the top 2 alleles
for (i = 1; i < 4; ++i) // insertion sort
call->a[0] = ref4;
for (i = 3, j = 1; i >= 0; --i) {
if ((qsum[i]&3) != ref4) {
- if (qsum[i]>>2 != 0) call->a[j++] = qsum[i]&3;
+ if (qsum[i]>>2 != 0)
+ {
+ call->qsum[j] = (float)(qsum[i]>>2)/qsum_tot;
+ call->a[j++] = qsum[i]&3;
+ }
else break;
}
+ else
+ call->qsum[0] = (float)(qsum[i]>>2)/qsum_tot;
}
if (ref_base >= 0) { // for SNPs, find the "unseen" base
if (((ref4 < 4 && j < 4) || (ref4 == 4 && j < 5)) && i >= 0)
if (i) kputc(',', &s);
kputw(bc->anno[i], &s);
}
+ ksprintf(&s,";QS=%f,%f,%f,%f", bc->qsum[0],bc->qsum[1],bc->qsum[2],bc->qsum[3]);
if (bc->vdb != 1)
ksprintf(&s, ";VDB=%.4f", bc->vdb);
kputc('\0', &s);
typedef struct {
int a[5]; // alleles: ref, alt, alt2, alt3
+ float qsum[4];
int n, n_alleles, shift, ori_ref, unseen;
int n_supp; // number of supporting non-reference reads
int anno[16], depth, ori_depth;
}
}
for (j = 0; j < h->n_smpl; ++j) {
-
- // Determine GT with maximum PL (multiple ALT sites only)
- int imax=-1;
- if ( iPL!=-1 ) {
- uint8_t *d = (uint8_t*)b->gi[iPL].data + j * x;
- int k,identical=1;
- imax=0;
- for (k=1; k<x; k++)
- {
- if ( identical && d[k]!=d[k-1] ) identical = 0;
- if ( d[k]<d[imax] ) imax = k;
- }
- // If all lks are identical, leave GT untouched
- if ( identical ) imax = -1;
- }
+ int ploidy = b->ploidy ? b->ploidy[j] : 2;
kputc('\t', s);
for (i = 0; i < b->n_gi; ++i) {
if (i) kputc(':', s);
if (b->gi[i].fmt == bcf_str2int("PL", 2)) {
uint8_t *d = (uint8_t*)b->gi[i].data + j * x;
int k;
- for (k = 0; k < x; ++k) {
- if (k > 0) kputc(',', s);
- kputw(d[k], s);
- }
+ if ( ploidy==1 )
+ for (k=0; k<b->n_alleles; k++)
+ {
+ if (k>0) kputc(',', s);
+ kputw(d[(k+1)*(k+2)/2-1], s);
+ }
+ else
+ for (k = 0; k < x; ++k) {
+ if (k > 0) kputc(',', s);
+ kputw(d[k], s);
+ }
} else if (b->gi[i].fmt == bcf_str2int("DP", 2) || b->gi[i].fmt == bcf_str2int("DV", 2)) {
kputw(((uint16_t*)b->gi[i].data)[j], s);
} else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) {
kputw(((int32_t*)b->gi[i].data)[j], s);
} else if (b->gi[i].fmt == bcf_str2int("GT", 2)) {
int y = ((uint8_t*)b->gi[i].data)[j];
- if ( y>>7&1 )
- kputsn("./.", 3, s);
- else if ( imax==-1 )
+ if ( ploidy==1 )
{
- kputc('0' + (y>>3&7), s);
- kputc("/|"[y>>6&1], s);
- kputc('0' + (y&7), s);
+ if ( y>>7&1 )
+ kputsn(".", 3, s);
+ else
+ kputc('0' + (y>>3&7), s);
}
else
{
- // Arguably, the while loop will be faster than two sqrts
- int n = 0;
- int row = 1;
- while ( n<imax )
- {
- row++;
- n += row;
+ if ( y>>7&1 )
+ kputsn("./.", 3, s);
+ else {
+ kputc('0' + (y>>3&7), s);
+ kputc("/|"[y>>6&1], s);
+ kputc('0' + (y&7), s);
}
- row--;
- kputw(imax-n+row, s);
- kputc("/|"[y>>6&1], s);
- kputw(row, s);
}
} else if (b->gi[i].fmt == bcf_str2int("GL", 2)) {
float *d = (float*)b->gi[i].data + j * x;
bcf_ginfo_t *gi; // array of geno fields
int n_alleles, n_smpl; // number of alleles and samples
// derived info: ref, alt, flt, info, fmt (<-str), n_gi (<-fmt), n_alleles (<-alt), n_smpl (<-bcf_hdr_t::n_smpl)
+ uint8_t *ploidy; // ploidy of all samples; if NULL, ploidy of 2 is assumed.
} bcf1_t;
typedef struct {
// keep the first n alleles and discard the rest
int bcf_shrink_alt(bcf1_t *b, int n);
+ // keep the masked alleles and discard the rest
+ void bcf_fit_alt(bcf1_t *b, int mask);
// convert GL to PL
int bcf_gl2pl(bcf1_t *b);
// if the site is an indel
#include <string.h>
#include <math.h>
+#include <assert.h>
#include "bcf.h"
#include "kstring.h"
#include "khash.h"
return kh_val(hash, k);
}
+void bcf_fit_alt(bcf1_t *b, int mask)
+{
+ mask |= 1; // REF must be always present
+
+ int i,j,nals=0;
+ for (i=0; i<sizeof(int); i++)
+ if ( mask&1<<i) nals++;
+
+ if ( b->n_alleles <= nals ) return;
+
+#if DBG
+ printf("fit_alt: %s, %s, mask=%d, nals=%d: ", b->ref, b->alt, mask, nals);
+ for (i=0; i<sizeof(int); i++)
+ if ( mask&1<<i) printf(" %d", i);
+ printf("\n");
+#endif
+ // update ALT, in principle any of the alleles can be removed
+ char *p;
+ if ( nals>1 )
+ {
+ char *dst, *src;
+ int n=0, nalts=nals-1;
+ for (src=dst=p=b->alt, i=1; *p; p++)
+ {
+ if ( *p!=',' ) continue;
+
+ if ( mask&1<<i )
+ {
+ n++;
+ if ( src!=dst )
+ {
+ memmove(dst,src,p-src);
+ dst += p-src;
+ }
+ else dst = p;
+ if ( n<nalts ) { *dst=','; dst++; }
+ }
+ i++;
+
+ if ( n>=nalts ) { *dst=0; break; }
+ src = p+1;
+ }
+ if ( n<nalts )
+ {
+ memmove(dst,src,p-src);
+ dst += p-src;
+ *dst = 0;
+ }
+ p = dst;
+ }
+ else p = b->alt, *p = '\0';
+#if DBG
+ printf("fit_alt: %s, mask=%d\n", b->alt, mask);
+#endif
+ p++;
+ memmove(p, b->flt, b->str + b->l_str - b->flt);
+ b->l_str -= b->flt - p;
+
+ // update PL and GT
+ int ipl=-1, igt=-1;
+ for (i = 0; i < b->n_gi; ++i)
+ {
+ bcf_ginfo_t *g = b->gi + i;
+ if (g->fmt == bcf_str2int("PL", 2)) ipl = i;
+ if (g->fmt == bcf_str2int("GT", 2)) igt = i;
+ }
+
+ // .. create mapping between old and new indexes
+ int npl = nals * (nals+1) / 2;
+ int *map = malloc(sizeof(int)*(npl>b->n_alleles ? npl : b->n_alleles));
+ int kori=0,knew=0;
+ for (i=0; i<b->n_alleles; i++)
+ {
+ for (j=0; j<=i; j++)
+ {
+ int skip=0;
+ if ( i && !(mask&1<<i) ) skip=1;
+ if ( j && !(mask&1<<j) ) skip=1;
+ if ( !skip ) { map[knew++] = kori; }
+ kori++;
+ }
+ }
+ // .. apply to all samples
+ int n_smpl = b->n_smpl;
+ for (i = 0; i < b->n_gi; ++i)
+ {
+ bcf_ginfo_t *g = b->gi + i;
+ if (g->fmt == bcf_str2int("PL", 2))
+ {
+ g->len = npl;
+ uint8_t *d = (uint8_t*)g->data;
+ int ismpl, npl_ori = b->n_alleles * (b->n_alleles + 1) / 2;
+ for (knew=ismpl=0; ismpl<n_smpl; ismpl++)
+ {
+ uint8_t *dl = d + ismpl * npl_ori;
+ for (j=0; j<npl; j++) d[knew++] = dl[map[j]];
+ }
+ } // FIXME: to add GL
+ }
+ // update GTs
+ map[0] = 0;
+ for (i=1, knew=0; i<b->n_alleles; i++)
+ map[i] = mask&1<<i ? ++knew : -1;
+ for (i=0; i<n_smpl; i++)
+ {
+ uint8_t gt = ((uint8_t*)b->gi[igt].data)[i];
+ int a1 = (gt>>3)&7;
+ int a2 = gt&7;
+ assert( map[a1]>=0 && map[a2]>=0 );
+ ((uint8_t*)b->gi[igt].data)[i] = ((1<<7|1<<6)>) | map[a1]<<3 | map[a2];
+ }
+ free(map);
+ b->n_alleles = nals;
+}
+
int bcf_shrink_alt(bcf1_t *b, int n)
{
char *p;
uint32_t *trio_aux;
char *prior_file, **subsam, *fn_dict;
uint8_t *ploidy;
- double theta, pref, indel_frac, min_perm_p, min_smpl_frac, min_lrt;
+ double theta, pref, indel_frac, min_perm_p, min_smpl_frac, min_lrt, min_ma_lrt;
void *bed;
} viewconf_t;
tid = begin = end = -1;
memset(&vc, 0, sizeof(viewconf_t));
- vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; vc.indel_frac = -1.; vc.n_perm = 0; vc.min_perm_p = 0.01; vc.min_smpl_frac = 0; vc.min_lrt = 1;
+ vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; vc.indel_frac = -1.; vc.n_perm = 0; vc.min_perm_p = 0.01; vc.min_smpl_frac = 0; vc.min_lrt = 1; vc.min_ma_lrt = -1;
memset(qcnt, 0, 8 * 256);
- while ((c = getopt(argc, argv, "FN1:l:cC:eHAGvbSuP:t:p:QgLi:IMs:D:U:X:d:T:Yw")) >= 0) {
+ while ((c = getopt(argc, argv, "FN1:l:cC:eHAGvbSuP:t:p:QgLi:IMs:D:U:X:d:T:Ywm:")) >= 0) {
switch (c) {
case '1': vc.n1 = atoi(optarg); break;
case 'l': vc.bed = bed_read(optarg); break;
case 'w': vc.flag |= VC_INDEL_ONLY; break;
case 'M': vc.flag |= VC_ANNO_MAX; break;
case 'Y': vc.flag |= VC_QCNT; break;
+ case 'm': vc.min_ma_lrt = atof(optarg); break;
case 't': vc.theta = atof(optarg); break;
case 'p': vc.pref = atof(optarg); break;
case 'i': vc.indel_frac = atof(optarg); break;
fprintf(stderr, " -g call genotypes at variant sites (force -c)\n");
fprintf(stderr, " -i FLOAT indel-to-substitution ratio [%.4g]\n", vc.indel_frac);
fprintf(stderr, " -I skip indels\n");
+ fprintf(stderr, " -m FLOAT alternative model for multiallelic and rare-variant calling, include if P(chi^2)>=FLOAT\n");
fprintf(stderr, " -p FLOAT variant if P(ref|D)<FLOAT [%.3g]\n", vc.pref);
fprintf(stderr, " -P STR type of prior: full, cond2, flat [full]\n");
fprintf(stderr, " -t FLOAT scaled substitution mutation rate [%.4g]\n", vc.theta);
int i;
for (i = 0; i < 9; ++i) em[i] = -1.;
}
- if (vc.flag & VC_CALL) { // call variants
+ bcf_p1_set_ploidy(b, p1); // could be improved: do this per site to allow pseudo-autosomal regions
+ if ( !(vc.flag&VC_KEEPALT) && vc.flag&VC_CALL && vc.min_ma_lrt>=0 )
+ {
+ int gts = call_multiallelic_gt(b,p1,vc.min_ma_lrt);
+ if ( gts==0 && vc.flag & VC_VARONLY ) continue;
+ }
+ else if (vc.flag & VC_CALL) { // call variants
bcf_p1rst_t pr;
int calret = bcf_p1_cal(b, (em[7] >= 0 && em[7] < vc.min_lrt), p1, &pr);
if (n_processed % 100000 == 0) {
#include <stdio.h>
#include <errno.h>
#include <assert.h>
+#include <limits.h>
#include "prob1.h"
+#include "kstring.h"
#include "kseq.h"
KSTREAM_INIT(gzFile, gzread, 16384)
return 0;
}
+void bcf_p1_set_ploidy(bcf1_t *b, bcf_p1aux_t *ma)
+{
+ // bcf_p1aux_t fields are not visible outside of prob1.c, hence this wrapper.
+ // Ideally, this should set ploidy per site to allow pseudo-autosomal regions
+ b->ploidy = ma->ploidy;
+}
+
void bcf_p1_destroy(bcf_p1aux_t *ma)
{
if (ma) {
}
}
-static int cal_pdg(const bcf1_t *b, bcf_p1aux_t *ma)
+extern double kf_gammap(double s, double z);
+
+int call_multiallelic_gt(bcf1_t *b, bcf_p1aux_t *ma, double threshold)
{
- int i, j;
- int n = (b->n_alleles+1)*b->n_alleles/2;
- double *lk = alloca(n * sizeof(long));
- memset(lk, 0, sizeof(double) * n);
- for (j = 0; j < ma->n; ++j) {
- const uint8_t *pi = ma->PL + j * ma->PL_len;
- double *pdg = ma->pdg + j * 3;
- pdg[0] = ma->q2p[pi[2]]; pdg[1] = ma->q2p[pi[1]]; pdg[2] = ma->q2p[pi[0]];
- for (i=0; i<n; i++) lk[i] += pi[i];
+ int nals = 1;
+ char *p;
+ for (p=b->alt; *p; p++)
+ {
+ if ( *p=='X' || p[0]=='.' ) break;
+ if ( p[0]==',' ) nals++;
}
+ if ( b->alt[0] && !*p ) nals++;
+
+ if ( nals==1 ) return 1;
- double norm=lk[0];
- for (i=1; i<n; i++) if (lk[i]<norm) norm=lk[i];
- #if DBG
- for (i=0,j=0; i<b->n_alleles; i++)
+ if ( nals>4 ) { fprintf(stderr,"too many alts: %d\n", nals); exit(1); }
+
+ // set PL and PL_len
+ uint8_t *pl = NULL;
+ int npl = 0, idp=-1;
+ int i;
+ for (i = 0; i < b->n_gi; ++i)
+ {
+ if (b->gi[i].fmt == bcf_str2int("PL", 2))
+ {
+ pl = (uint8_t*)b->gi[i].data;
+ npl = b->gi[i].len;
+ }
+ if (b->gi[i].fmt == bcf_str2int("DP", 2)) idp=i;
+ }
+ if ( !pl ) return -1;
+
+ int npdg = nals*(nals+1)/2;
+ float *pdg,*_pdg;
+ _pdg = pdg = malloc(sizeof(float)*ma->n*npdg);
+ for (i=0; i<ma->n; i++)
+ {
+ int j;
+ float sum = 0;
+ for (j=0; j<npdg; j++)
+ {
+ _pdg[j] = pow(10,-0.1*pl[j]);
+ sum += _pdg[j];
+ }
+ if ( sum )
+ for (j=0; j<npdg; j++) _pdg[j] /= sum;
+ _pdg += npdg;
+ pl += npl;
+ }
+
+ if ((p = strstr(b->info, "QS=")) == 0) { fprintf(stderr,"INFO/QS is required with -m, exiting\n"); exit(1); }
+ float qsum[4];
+ if ( sscanf(p+3,"%f,%f,%f,%f",&qsum[0],&qsum[1],&qsum[2],&qsum[3])!=4 ) { fprintf(stderr,"Could not parse %s\n",p); exit(1); }
+
+
+ int ia,ib,ic, max_als=0, max_als2=0;
+ float max_lk = INT_MIN, max_lk2 = INT_MIN, lk_sum = INT_MIN;
+ for (ia=0; ia<nals; ia++)
+ {
+ //if ( ia && qsum[ia]==0 ) continue;
+ float lk_tot = 0;
+ int iaa = (ia+1)*(ia+2)/2-1;
+ int isample;
+ for (isample=0; isample<ma->n; isample++)
+ {
+ float *p = pdg + isample*npdg;
+ assert( log(p[iaa]) <= 0 );
+ lk_tot += log(p[iaa]);
+ }
+ if ( max_lk<lk_tot ) { max_lk2 = max_lk; max_als2 = max_als; max_lk = lk_tot; max_als = 1<<ia; }
+ else if ( max_lk2<lk_tot ) { max_lk2 = lk_tot; max_als2 = 1<<ia; }
+ lk_sum = lk_tot>lk_sum ? lk_tot + log(1+exp(lk_sum-lk_tot)) : lk_sum + log(1+exp(lk_tot-lk_sum));
+ }
+ for (ia=0; ia<nals; ia++)
{
- int k; for (k=0; k<=i; k++) printf("%.0f\t", lk[j++]);
- printf("\n");
+ if ( qsum[ia]==0 ) continue;
+ //if ( ia && qsum[ia]==0 ) continue;
+ for (ib=0; ib<ia; ib++)
+ {
+ if ( qsum[ib]==0 ) continue;
+ //if ( ib && qsum[ib]==0 ) continue;
+ //if ( qsum[ia]+qsum[ib]==0 ) continue;
+ float lk_tot = 0;
+ float fa = qsum[ia]/(qsum[ia]+qsum[ib]);
+ float fb = qsum[ib]/(qsum[ia]+qsum[ib]);
+ float fab = 2*fa*fb; fa *= fa; fb *= fb;
+ int isample, iaa = (ia+1)*(ia+2)/2-1, ibb = (ib+1)*(ib+2)/2-1, iab = iaa - ia + ib;
+ for (isample=0; isample<ma->n; isample++)
+ {
+ if ( b->ploidy && b->ploidy[isample]==1 ) continue;
+ float *p = pdg + isample*npdg;
+ assert( log(fa*p[iaa] + fb*p[ibb] + fab*p[iab]) <= 0 );
+ lk_tot += log(fa*p[iaa] + fb*p[ibb] + fab*p[iab]);
+ }
+ if ( max_lk<lk_tot ) { max_lk2 = max_lk; max_als2 = max_als; max_lk = lk_tot; max_als = 1<<ia|1<<ib; }
+ else if ( max_lk2<lk_tot ) { max_lk2 = lk_tot; max_als2 = 1<<ia|1<<ib; }
+ lk_sum = lk_tot>lk_sum ? lk_tot + log(1+exp(lk_sum-lk_tot)) : lk_sum + log(1+exp(lk_tot-lk_sum));
+ }
}
- #endif
- for (i=0; i<n; i++) lk[i] = pow(10,-0.1*(lk[i]-norm));
-
- // Find out the most likely alleles. In contrast to the original version,
- // ALT alleles may not be printed when they are more likely than REF but
- // significantly less likely than the most likely ALT. The only criterion
- // is the LK ratio now. To obtain behaviour similar to the original one,
- // use the pref variant below.
- double pmax=0; //,pref=0;
- n = ma->is_indel ? b->n_alleles : b->n_alleles-1;
- for (i=0; i<n; i++)
+ for (ia=0; ia<nals; ia++)
{
- double pr=0;
- int k=i*(i+1)/2;
- for (j=0; j<=i; j++) { pr+=lk[k]; k++; }
- for (j=i+1; j<b->n_alleles; j++) { k=j*(j+1)/2+i; pr+=lk[k]; }
- #if DBG
- printf("%d\t%e\n", i,pr);
- #endif
- if (pmax<pr) pmax=pr;
- // if (i==0) pref=pr;
- // if (pr<pref && pr/pmax < 1e-4) break;
- if (pr/pmax < 1e-4) break; // Assuming the alleles are sorted by the lk
+ if ( qsum[ia]==0 ) continue;
+ //if ( ia && qsum[ia]==0 ) continue;
+ for (ib=0; ib<ia; ib++)
+ {
+ if ( qsum[ib]==0 ) continue;
+ //if ( ib && qsum[ib]==0 ) continue;
+ for (ic=0; ic<ib; ic++)
+ {
+ if ( qsum[ic]==0 ) continue;
+ //if ( ic && qsum[ic]==0 ) continue;
+ //if ( qsum[ia]+qsum[ib]+qsum[ic]==0 ) continue;
+ float lk_tot = 0;
+ float fa = qsum[ia]/(qsum[ia]+qsum[ib]+qsum[ic]);
+ float fb = qsum[ib]/(qsum[ia]+qsum[ib]+qsum[ic]);
+ float fc = qsum[ic]/(qsum[ia]+qsum[ib]+qsum[ic]);
+ float fab = 2*fa*fb, fac = 2*fa*fc, fbc = 2*fb*fc; fa *= fa; fb *= fb; fc *= fc;
+ int isample, iaa = (ia+1)*(ia+2)/2-1, ibb = (ib+1)*(ib+2)/2-1, icc = (ic+1)*(ic+2)/2-1;
+ int iab = iaa - ia + ib, iac = iaa - ia + ic, ibc = ibb - ib + ic;
+ for (isample=0; isample<ma->n; isample++)
+ {
+ if ( b->ploidy && b->ploidy[isample]==1 ) continue;
+ float *p = pdg + isample*npdg;
+ assert( log(fa*p[iaa] + fb*p[ibb] + fc*p[icc] + fab*p[iab] + fac*p[iac] + fbc*p[ibc]) <= 0 );
+ lk_tot += log(fa*p[iaa] + fb*p[ibb] + fc*p[icc] + fab*p[iab] + fac*p[iac] + fbc*p[ibc]);
+ }
+ if ( max_lk<lk_tot ) { max_lk2 = max_lk; max_als2 = max_als; max_lk = lk_tot; max_als = 1<<ia|1<<ib|1<<ic; }
+ else if ( max_lk2<lk_tot ) { max_lk2 = lk_tot; max_als2 = 1<<ia|1<<ib|1<<ic; }
+ lk_sum = lk_tot>lk_sum ? lk_tot + log(1+exp(lk_sum-lk_tot)) : lk_sum + log(1+exp(lk_tot-lk_sum));
+ }
+ }
}
- return i-1;
+
+ int n1=0, n2=0;
+ for (i=0; i<nals; i++) if ( max_als&1<<i) n1++;
+ for (i=0; i<nals; i++) if ( max_als2&1<<i) n2++;
+ if ( n2<n1 && kf_gammap(1,2.0*(max_lk-max_lk2))<threshold )
+ {
+ max_lk = max_lk2;
+ max_als = max_als2;
+ }
+
+ // Get the BCF record ready for output
+ kstring_t s;
+ memset(&s, 0, sizeof(kstring_t));
+ kputc('\0', &s); kputs(b->ref, &s); kputc('\0', &s);
+ kputs(b->alt, &s); kputc('\0', &s); kputc('\0', &s);
+ kputs(b->info, &s); if (b->info[0]) kputc(';', &s); kputc('\0', &s);
+ kputs(b->fmt, &s); kputc('\0', &s);
+ free(b->str);
+ b->m_str = s.m; b->l_str = s.l; b->str = s.s;
+ b->qual = -4.343*(log(1-exp(max_lk-lk_sum)));
+ if ( b->qual>999 ) b->qual = 999;
+ //bcf_sync(b);
+
+ int x, old_n_gi = b->n_gi;
+ s.m = b->m_str; s.l = b->l_str - 1; s.s = b->str;
+ kputs(":GT:GQ", &s); kputc('\0', &s);
+ b->m_str = s.m; b->l_str = s.l; b->str = s.s;
+ bcf_sync(b);
+
+ // Call GT
+ int isample, gts=0;
+ for (isample = 0; isample < b->n_smpl; isample++)
+ {
+ int ploidy = b->ploidy ? b->ploidy[isample] : 2;
+ float *p = pdg + isample*npdg;
+ int ia, als = 0;
+ float lk = INT_MIN, lk_sum=0;
+ for (ia=0; ia<nals; ia++)
+ {
+ if ( !(max_als&1<<ia) ) continue;
+ int iaa = (ia+1)*(ia+2)/2-1;
+ float _lk = p[iaa]*qsum[ia]*qsum[ia];
+ if ( _lk > lk ) { lk = _lk; als = ia<<3 | ia; }
+ lk_sum += _lk;
+ }
+ if ( ploidy==2 )
+ {
+ for (ia=0; ia<nals; ia++)
+ {
+ if ( !(max_als&1<<ia) ) continue;
+ int iaa = (ia+1)*(ia+2)/2-1;
+ for (ib=0; ib<ia; ib++)
+ {
+ if ( !(max_als&1<<ib) ) continue;
+ int iab = iaa - ia + ib;
+ float _lk = 2*qsum[ia]*qsum[ib]*p[iab];
+ if ( _lk > lk ) { lk = _lk; als = ib<<3 | ia; }
+ lk_sum += _lk;
+ }
+ }
+ }
+ lk = -log(1-lk/lk_sum)/0.2302585;
+ if ( idp>=0 && ((uint16_t*)b->gi[idp].data)[isample]==0 )
+ {
+ als |= 1<<7;
+ lk = 0;
+ }
+ ((uint8_t*)b->gi[old_n_gi].data)[isample] = als;
+ ((uint8_t*)b->gi[old_n_gi+1].data)[isample] = lk<100 ? (int)lk : 99;
+
+ gts |= (als>>3&7) | (als&7);
+ }
+ bcf_fit_alt(b,max_als);
+ bcf_sync(b);
+
+ free(pdg);
+ return gts;
+}
+
+static int cal_pdg(const bcf1_t *b, bcf_p1aux_t *ma)
+{
+ int i, j;
+ long *p, tmp;
+ p = alloca(b->n_alleles * sizeof(long));
+ memset(p, 0, sizeof(long) * b->n_alleles);
+ for (j = 0; j < ma->n; ++j) {
+ const uint8_t *pi = ma->PL + j * ma->PL_len;
+ double *pdg = ma->pdg + j * 3;
+ pdg[0] = ma->q2p[pi[2]]; pdg[1] = ma->q2p[pi[1]]; pdg[2] = ma->q2p[pi[0]];
+ for (i = 0; i < b->n_alleles; ++i)
+ p[i] += (int)pi[(i+1)*(i+2)/2-1];
+ }
+ for (i = 0; i < b->n_alleles; ++i) p[i] = p[i]<<4 | i;
+ for (i = 1; i < b->n_alleles; ++i) // insertion sort
+ for (j = i; j > 0 && p[j] < p[j-1]; --j)
+ tmp = p[j], p[j] = p[j-1], p[j-1] = tmp;
+ for (i = b->n_alleles - 1; i >= 0; --i)
+ if ((p[i]&0xf) == 0) break;
+ return i;
}
+
int bcf_p1_call_gt(const bcf_p1aux_t *ma, double f0, int k)
{
double sum, g[3];
void bcf_p1_init_prior(bcf_p1aux_t *ma, int type, double theta);
void bcf_p1_init_subprior(bcf_p1aux_t *ma, int type, double theta);
void bcf_p1_destroy(bcf_p1aux_t *ma);
+ void bcf_p1_set_ploidy(bcf1_t *b, bcf_p1aux_t *ma);
int bcf_p1_cal(const bcf1_t *b, int do_contrast, bcf_p1aux_t *ma, bcf_p1rst_t *rst);
+ int call_multiallelic_gt(bcf1_t *b, bcf_p1aux_t *ma, double threshold);
int bcf_p1_call_gt(const bcf_p1aux_t *ma, double f0, int k);
void bcf_p1_dump_afs(bcf_p1aux_t *ma);
int bcf_p1_read_prior(bcf_p1aux_t *ma, const char *fn);