3 Markov chain Monte Carlo on trees (Bayesian phylogenetic analysis)
\r
5 Ziheng YANG, since December 2002
\r
7 cc -o mcmctree -O3 mcmctree.c tools.c -lm
\r
8 cc -o infinitesites -DINFINITESITES -O3 mcmctree.c tools.c -lm
\r
9 cl -O2 -W3 mcmctree.c tools.c
\r
10 cl -FeInfiniteSites.exe -DINFINITESITES -W3 -D_CRT_SECURE_NO_DEPRECATE -O2 mcmctree.c tools.c
\r
12 mcmctree <ControlFileName>
\r
14 InfiniteSites <ControlFileName>
\r
15 FixedDsClock1.txt or FixedDsClock23.txt are hard-coded file names
\r
19 #define INFINITESITES
\r
25 #define NBRANCH (NS*2-2)
\r
26 #define NNODE (NS*2-1)
\r
28 #define NGENE 8000 /* used for gnodes[NGENE] */
\r
29 #define NMORPHLOCI 10 /* used for com.zmorph[] */
\r
33 #define MaxNFossils 200
\r
36 double (*rndSymmetrical)(void);
\r
39 extern int noisy, NFunCall;
\r
40 extern char BASEs[];
\r
41 extern double PjumpOptimum;
\r
43 int GetOptions(char *ctlf);
\r
44 int ReadTreeSeqs(FILE*fout);
\r
45 int ProcessFossilInfo();
\r
46 int ReadBlengthGH (char infile[]);
\r
47 int GenerateBlengthGH (char infile[]);
\r
50 int UseLocus(int locus, int copycondP, int setmodel, int setSeqName);
\r
51 int AcceptRejectLocus(int locus, int accept);
\r
52 void switchconPin(void);
\r
53 int SetPGene(int igene, int _pi, int _UVRoot, int _alpha, double xcom[]);
\r
54 int DownSptreeSetTime(int inode);
\r
55 void getSinvDetS(double space[]);
\r
56 int GetInitials(void);
\r
57 int GenerateGtree(int locus);
\r
58 int printGtree(int printBlength);
\r
59 int SetParameters(double x[]);
\r
60 int ConditionalPNode(int inode, int igene, double x[]);
\r
61 double lnpData(double lnpDi[]);
\r
62 double lnpD_locus(int locus);
\r
63 double lnpD_locus_Approx(int locus);
\r
64 double lnptNCgiventC(void);
\r
66 double lnptCalibrationDensity(double t, int fossil, double p[7]);
\r
67 int SetupPriorTimesFossilErrors(void);
\r
68 double lnpriorTimesBDS_Approach1(void);
\r
69 double lnpriorTimesTipDate(void);
\r
70 double lnpriorTimes(void);
\r
71 double lnpriorRates(void);
\r
72 double logPriorRatioGamma(double xnew, double xold, double a, double b);
\r
73 void copySptree(void);
\r
74 void printSptree(void);
\r
75 double InfinitesitesClock(double *FixedDs);
\r
76 double Infinitesites(FILE *fout);
\r
77 int collectx (FILE* fout, double x[]);
\r
78 int MCMC(FILE* fout);
\r
79 int LabelOldCondP(int spnode);
\r
80 double UpdateTimes(double *lnL, double finetune);
\r
81 double UpdateTimesClock23(double *lnL, double finetune);
\r
82 double UpdateRates(double *lnL, double finetune);
\r
83 double UpdateParameters(double *lnL, double finetune);
\r
84 double UpdateParaRates(double *lnL, double finetune, double space[]);
\r
85 double mixing(double *lnL, double finetune);
\r
86 double UpdatePFossilErrors(double finetune);
\r
87 int getPfossilerr (double postEFossil[], double nround);
\r
88 int DescriptiveStatisticsSimpleMCMCTREE (FILE *fout, char infile[], int nbin);
\r
91 unsigned char *z[NS];
\r
92 char *spname[NS], seqf[512],outf[512],treef[512],daafile[512],mcmcf[512],inBVf[512];
\r
93 char oldconP[NNODE]; /* update conP for node? (0 yes; 1 no) */
\r
94 int seqtype, ns, ls, ngene, posG[2],lgene[1], *pose, npatt, readpattern;
\r
95 int np, ncode, ntime, nrate, nrgene, nalpha, npi, ncatG, print;
\r
96 int cleandata, ndata;
\r
97 int model, clock, fix_kappa, fix_alpha, fix_rgene, Mgene;
\r
98 int method, icode, codonf, aaDist, NSsites;
\r
99 double *fpatt, kappa, alpha, TipDate, TipDate_TimeUnit;
\r
100 double rgene[NGENE],piG[NGENE][NCODE]; /* not used */
\r
101 double (*plfun)(double x[],int np), freqK[NCATG], rK[NCATG], *conP, *fhK;
\r
103 int curconP; /* curconP = 0 or 1 */
\r
105 double *conPin[2], space[100000]; /* change space[] to dynamic memory? */
\r
106 int conPSiteClass, NnodeScale;
\r
107 char *nodeScale; /* nScale[ns-1] for interior nodes */
\r
108 double *nodeScaleF; /* nScaleF[npatt] for scale factors */
\r
112 int nbranch, nnode, root, branches[NBRANCH][2];
\r
114 struct TREEN { /* ipop is node number in species tree */
\r
115 int father, nson, sons[2], ibranch, ipop;
\r
116 double branch, age, label, *conP;
\r
117 char *nodeStr, fossil;
\r
118 } *nodes, **gnodes, nodes_t[2*NS-1];
\r
120 /* nodes_t[] is working space. nodes is a pointer and moves around.
\r
121 gnodes[] holds the gene trees, subtrees constructed from the master species
\r
122 tree. Each locus has a full set of rates (rates) for all branches on the
\r
123 master tree, held in sptree.nodes[].rates. Branch lengths in the gene
\r
124 tree are calculated by using those rates and the divergence times.
\r
126 gnodes[][].label in the gene tree is used to store branch lengths estimated
\r
127 under no clock when mcmc.usedata=2 (normal approximation to likelihood).
\r
131 struct SPECIESTREE {
\r
132 int nbranch, nnode, root, nspecies, nfossil;
\r
135 char name[LSPNAME+1], fossil, usefossil; /* fossil: 0, 1(L), 2(U), 3(B), 4(G) */
\r
136 int father, nson, sons[2];
\r
137 double age, pfossil[7]; /* parameters in fossil distribution */
\r
138 double *rates; /* log rates for loci */
\r
141 /* all trees are binary & rooted, with ancestors unknown. */
\r
144 struct DATA { /* locus-specific data and tree information */
\r
145 int ns[NGENE], ls[NGENE], npatt[NGENE], ngene, nmorphloci, lgene[NGENE];
\r
146 int root[NGENE+1], conP_offset[NGENE];
\r
147 int priortime, priorrate;
\r
148 char datatype[NGENE], *z[NGENE][NS], cleandata[NGENE];
\r
149 double *zmorph[NMORPHLOCI][NS*2-1], *Rmorph[NMORPHLOCI];
\r
150 double *fpatt[NGENE], lnpT, lnpR, lnpDi[NGENE], pi[NGENE][NCODE];
\r
151 double rgene[NGENE], kappa[NGENE], alpha[NGENE];
\r
152 double BDS[4]; /* parameters in the birth-death-sampling model */
\r
153 double kappagamma[2], alphagamma[2], rgenegD[3], sigma2gD[3];
\r
154 double pfossilerror[3], /* (p_beta, q_beta, NminCorrect) */ Pfossilerr, *CcomFossilErr;
\r
155 double sigma2[NGENE]; /* sigma2[g] are the variances */
\r
156 double *blMLE[NGENE], *Gradient[NGENE], *Hessian[NGENE];
\r
160 struct MCMCPARAMETERS {
\r
161 int resetFinetune, burnin, nsample, sampfreq, usedata, saveconP, print;
\r
162 double finetune[6];
\r
163 } mcmc; /* control parameters */
\r
166 char *models[]={"JC69", "K80", "F81", "F84", "HKY85", "T92", "TN93", "REV"};
\r
167 enum {JC69, K80, F81, F84, HKY85, T92, TN93, REV} MODELS;
\r
168 enum {BASE, AA, CODON, MORPHC} DATATYPE;
\r
171 double PMat[16], Cijk[64], Root[4];
\r
172 double _rateSite=1, OldAge=999;
\r
173 int debug=0, LASTROUND=0, BayesEB, testlnL=0, NPMat=0; /* no use for this */
\r
175 /* for sptree.nodes[].fossil: lower, upper, bounds, gamma, inverse-gamma */
\r
176 enum {LOWER_F=1, UPPER_F, BOUND_F, GAMMA_F, SKEWN_F, SKEWT_F, S2N_F} FOSSIL_FLAGS;
\r
177 char *fossils[]={" ", "L", "U", "B", "G", "SN", "ST", "S2N"};
\r
178 int npfossils[]={ 0, 4, 2, 4, 2, 3, 4, 7};
\r
179 char *clockstr[]={"", "Global clock", "Independent rates", "Autocorrelated rates"};
\r
180 enum {SQRT_B=1, LOG_B, ARCSIN_B} B_Transforms;
\r
181 char *Btransform[]={"", "square root", "logarithm", "arcsin"};
\r
184 #include "treesub.c"
\r
187 int main (int argc, char *argv[])
\r
189 char ctlf[512] = "mcmctree.ctl";
\r
194 printf("Select the proposal kernel\n");
\r
195 printf(" 0: uniform\n 1: Triangle\n 2: Laplace\n 3: Normal\n 4: Bactrian\n 5: BactrianTriangle\n 6: BactrianLaplace\n");
\r
198 if(k==0) { rndSymmetrical = rnduM0V1; PjumpOptimum=0.4; }
\r
199 if(k==1) { rndSymmetrical = rndTriangle; PjumpOptimum=0.4; }
\r
200 if(k==2) { rndSymmetrical = rndLaplace; PjumpOptimum=0.4; }
\r
201 if(k==3) { rndSymmetrical = rndNormal; PjumpOptimum=0.4; }
\r
202 if(k==4) { rndSymmetrical = rndBactrian; PjumpOptimum=0.3; }
\r
203 if(k==5) { rndSymmetrical = rndBactrianTriangle; PjumpOptimum=0.3; }
\r
204 if(k==6) { rndSymmetrical = rndBactrianLaplace; PjumpOptimum=0.3; }
\r
206 data.priortime = 0; /* 0: BDS; 1: beta */
\r
207 data.priorrate = 0; /* 0: LogNormal; 1: gamma */
\r
210 com.alpha=0.; com.ncatG=1;
\r
211 com.ncode=4; com.clock=1;
\r
213 printf("MCMCTREE in %s\n", pamlVerStr);
\r
215 strncpy(ctlf, argv[1], 127);
\r
217 data.BDS[0]=1; data.BDS[1]=1; data.BDS[2]=0;
\r
218 strcpy(com.outf, "out");
\r
219 strcpy(com.mcmcf, "mcmc.txt");
\r
224 fout = gfopen(com.outf,"w");
\r
225 fprintf(fout, "MCMCTREE (%s) %s\n", pamlVerStr, com.seqf);
\r
227 ReadTreeSeqs(fout);
\r
228 if(data.pfossilerror && (data.pfossilerror[2]<0 || data.pfossilerror[2]>sptree.nfossil))
\r
229 error2("nMinCorrect for fossil errors is out of range.");
\r
231 if(mcmc.usedata==1) {
\r
232 if(com.seqtype!=0) error2("usedata = 1 for nucleotides only");
\r
236 if (com.ncatG<2 || com.ncatG>NCATG) error2("ncatG");
\r
237 com.plfun = lfundG;
\r
239 if (com.model>HKY85) error2("Only HKY or simpler models are allowed.");
\r
240 if (com.model==JC69 || com.model==F81) { com.fix_kappa=1; com.kappa=1; }
\r
242 else if (mcmc.usedata==2) {
\r
245 if(com.seqtype==1) com.ncode = 61;
\r
246 if(com.seqtype==2) com.ncode = 20;
\r
248 else if(mcmc.usedata==3) {
\r
249 GenerateBlengthGH("out.BV"); /* this is used so that the in.BV is not overwritten */
\r
253 /* Do we want RootAge constraint at the root if (com.clock==1)? */
\r
254 if(com.clock!=1 && sptree.RootAge[1]<=0 && sptree.nodes[sptree.root].fossil<=LOWER_F)
\r
255 error2("set RootAge in control file when there is no upper bound on root");
\r
256 if(data.pfossilerror[0]==0 && !sptree.nodes[sptree.root].fossil) {
\r
257 if(sptree.RootAge[1] <= 0)
\r
258 error2("set RootAge in control file when there is no upper bound on root");
\r
260 sptree.nodes[sptree.root].fossil = (sptree.RootAge[0]>0 ? BOUND_F : UPPER_F);
\r
261 for(i=0; i<4; i++)
\r
262 sptree.nodes[sptree.root].pfossil[i] = sptree.RootAge[i];
\r
264 if( com.TipDate_TimeUnit==0)
\r
265 printf("\nFossil calibration information used.\n");
\r
266 for(i=sptree.nspecies; i<sptree.nspecies*2-1; i++) {
\r
267 if((k=sptree.nodes[i].fossil) == 0) continue;
\r
268 printf("Node %3d: %3s ( ", i+1, fossils[k]);
\r
269 for(j=0; j<npfossils[k]; j++) {
\r
270 printf("%6.4f", sptree.nodes[i].pfossil[j + (k==UPPER_F)]);
\r
271 printf("%s", (j==npfossils[k]-1 ? " )\n" : ", "));
\r
275 #if(defined INFINITESITES)
\r
276 Infinitesites(fout);
\r
280 fputs("\nSpecies tree for FigTree.", fout);
\r
282 FPN(fout); OutTreeN(fout,1,PrNodeNum); FPN(fout);
\r
283 DescriptiveStatisticsSimpleMCMCTREE(fout, com.mcmcf, 1);
\r
295 /* This allocates memory for conditional probabilities (conP).
\r
296 gnodes[locus] is not allocated here but in GetGtree().
\r
298 Conditional probabilities for internal nodes are com.conPin[2], allocated
\r
299 according to data.ns[locus] and data.npatt[locus] at all loci. Two copies
\r
300 of the space are allocated, hence the [2]. The copy used for the current
\r
301 gene trees is com.conPin[com.curconP] while the copy for proposed gene trees
\r
302 is com.conPin[!com.curconP]. data.conP_offset[locus] marks the starting
\r
303 position in conPin[] for each locus.
\r
305 Memory arrangement if(com.conPSiteClass=1):
\r
306 ncode*npatt for each node, by node, by iclass, by locus
\r
308 int locus,j,k, s1,sG=1, sfhK=0, g=data.ngene;
\r
309 double *conP, *rates;
\r
311 /* get mem for conP (internal nodes) */
\r
312 if(mcmc.usedata==1) {
\r
313 if(!com.fix_alpha && mcmc.saveconP) {
\r
314 com.conPSiteClass=1; sG=com.ncatG;
\r
316 data.conP_offset[0] = 0;
\r
317 for(locus=0,com.sconP=0; locus<g; locus++) {
\r
318 s1= com.ncode * data.npatt[locus];
\r
319 com.sconP += sG*s1*(data.ns[locus]-1)*sizeof(double);
\r
321 data.conP_offset[locus+1] = data.conP_offset[locus] + sG*s1*(data.ns[locus]-1);
\r
324 if((com.conPin[0]=(double*)malloc(2*com.sconP))==NULL)
\r
325 error2("oom conP");
\r
327 com.conPin[1] = com.conPin[0] + com.sconP/sizeof(double);
\r
328 printf("\n%u bytes for conP\n", 2*com.sconP);
\r
330 /* set gnodes[locus][].conP for tips and internal nodes */
\r
332 for(locus=0; locus<g; locus++) {
\r
333 conP = com.conPin[0] + data.conP_offset[locus];
\r
334 for(j=data.ns[locus]; j<data.ns[locus]*2-1; j++,conP+=com.ncode*data.npatt[locus])
\r
335 gnodes[locus][j].conP = conP;
\r
336 if(!data.cleandata[locus]) {
\r
337 /* Is this call to UseLocus still needed? Ziheng 28/12/2009 */
\r
338 UseLocus(locus, 0, mcmc.usedata, 0);
\r
342 if(!com.fix_alpha) {
\r
343 for(locus=0; locus<g; locus++)
\r
344 sfhK = max2(sfhK, data.npatt[locus]);
\r
345 sfhK *= com.ncatG*sizeof(double);
\r
346 if((com.fhK=(double*)realloc(com.fhK,sfhK))==NULL) error2("oom");
\r
350 else if(mcmc.usedata==2) { /* allocate data.Gradient & data.Hessian */
\r
351 for(locus=0,k=0; locus<data.ngene; locus++)
\r
352 k += (2*data.ns[locus]-1)*(2*data.ns[locus]-1+2);
\r
353 if((com.conPin[0]=(double*)malloc(k*sizeof(double)))==NULL)
\r
354 error2("oom g & H");
\r
355 for(j=0; j<k; j++) com.conPin[0][j]=-1;
\r
356 for(locus=0,j=0; locus<data.ngene; locus++) {
\r
357 data.blMLE[locus] = com.conPin[0]+j;
\r
358 data.Gradient[locus] = com.conPin[0]+j+(2*data.ns[locus]-1);
\r
359 data.Hessian[locus] = com.conPin[0]+j+(2*data.ns[locus]-1)*2;
\r
360 j += (2*data.ns[locus]-1)*(2*data.ns[locus]-1+2);
\r
364 if(com.clock>1) { /* space for rates */
\r
365 s1 = (sptree.nspecies*2-1)*g*sizeof(double);
\r
366 if(noisy) printf("%d bytes for rates.\n", s1);
\r
367 if((rates=(double*)malloc(s1))==NULL) error2("oom for rates");
\r
368 for(j=0; j<sptree.nspecies*2-1; j++)
\r
369 sptree.nodes[j].rates = rates+g*j;
\r
374 void FreeMem (void)
\r
378 for(locus=0; locus<data.ngene; locus++)
\r
379 free(gnodes[locus]);
\r
382 free(com.conPin[0]);
\r
383 if(mcmc.usedata==1) {
\r
384 for(locus=0; locus<data.ngene; locus++) {
\r
385 free(data.fpatt[locus]);
\r
386 for(j=0;j<data.ns[locus]; j++)
\r
387 free(data.z[locus][j]);
\r
391 free(sptree.nodes[0].rates);
\r
393 if(mcmc.usedata==1 && com.alpha)
\r
396 for(j=0; j<data.nmorphloci; j++){
\r
397 free(data.zmorph[j][0]);
\r
398 free(data.Rmorph[j]);
\r
403 int UseLocus (int locus, int copyconP, int setModel, int setSeqName)
\r
406 This point nodes to the gene tree at locus gnodes[locus] and set com.z[]
\r
407 etc. for likelihood calculation for the locus. Note that the gene tree
\r
408 topology (gnodes[]) is never copied, but nodes[].conP are repositioned in the
\r
409 algorithm. The pointer for root gnodes[][com.ns].conP is assumed to be the
\r
410 start of the whole block for the locus.
\r
411 If (copyconP && mcmc.useData), the conP for internal nodes point
\r
412 to a fixed place (indicated by data.conP_offset[locus]) in the alternative
\r
413 space com.conPin[!com.curconP]. Note that the conP for each locus uses the
\r
414 correct space so that this routine can be used by all the proposal steps,
\r
415 some of which operates on one locus and some change all loci.
\r
417 Try to replace this with UseLocus() for B&C.
\r
419 int i, s1 = com.ncode*data.npatt[locus], sG = (com.conPSiteClass?com.ncatG:1);
\r
420 double *conPt=com.conPin[!com.curconP]+data.conP_offset[locus];
\r
422 com.ns = data.ns[locus];
\r
423 com.ls = data.ls[locus];
\r
424 tree.root = data.root[locus];
\r
425 tree.nnode = 2*com.ns-1;
\r
426 nodes = gnodes[locus];
\r
427 if(copyconP && mcmc.usedata==1) { /* this preserves the old conP. */
\r
428 memcpy(conPt, gnodes[locus][com.ns].conP, sG*s1*(com.ns-1)*sizeof(double));
\r
429 for(i=com.ns; i<tree.nnode; i++)
\r
430 nodes[i].conP = conPt+(i-com.ns)*s1;
\r
433 if(setModel && mcmc.usedata==1) {
\r
434 com.cleandata = data.cleandata[locus];
\r
435 for(i=0; i<com.ns; i++)
\r
436 com.z[i] = data.z[locus][i];
\r
437 com.npatt = com.posG[1] = data.npatt[locus];
\r
439 com.fpatt = data.fpatt[locus];
\r
441 /* The following is model-dependent */
\r
442 if(data.datatype[locus]==BASE) {
\r
443 com.kappa = data.kappa[locus];
\r
444 com.alpha = data.alpha[locus];
\r
446 xtoy(data.pi[locus], com.pi, com.ncode);
\r
447 if(com.model<=TN93)
\r
448 eigenTN93(com.model, com.kappa, com.kappa, com.pi, &nR, Root, Cijk);
\r
451 DiscreteGamma (com.freqK,com.rK,com.alpha,com.alpha,com.ncatG,DGammaUseMedian);
\r
454 com.NnodeScale = data.NnodeScale[locus];
\r
455 com.nodeScale = data.nodeScale[locus];
\r
456 nS = com.NnodeScale*com.npatt * (com.conPSiteClass?com.ncatG:1);
\r
457 for(i=0; i<nS; i++)
\r
458 com.nodeScaleF[i]=0;
\r
462 for(i=0;i<com.ns;i++) com.spname[i] = sptree.nodes[nodes[i].ipop].name;
\r
468 int AcceptRejectLocus (int locus, int accept)
\r
470 /* This accepts or rejects the proposal at one locus.
\r
471 This works for proposals that change one locus only.
\r
472 After UseLocus(), gnodes[locus][ns].conP points to the alternative
\r
473 conP space. If the proposal is accepted, this copies the newly calculated
\r
474 conP into gnodes[locus][ns].conP. In either case, gnodes[].conP is
\r
476 Proposals that change all loci use switchconP() to accept the proposal.
\r
478 int i, ns=data.ns[locus], s1=com.ncode*data.npatt[locus], sG=1;
\r
479 double *conP=com.conPin[com.curconP]+data.conP_offset[locus];
\r
481 if(mcmc.usedata==1) {
\r
482 if(com.conPSiteClass) sG=com.ncatG;
\r
484 memcpy(conP, gnodes[locus][ns].conP, sG*s1*(ns-1)*sizeof(double));
\r
485 for(i=ns; i<ns*2-1; i++)
\r
486 gnodes[locus][i].conP = conP+(i-ns)*s1;
\r
491 void switchconPin(void)
\r
493 /* This reposition pointers gnodes[locus].conP to the alternative com.conPin,
\r
494 to avoid recalculation of conditional probabilities, when a proposal is
\r
495 accepted in steps that change all loci in one go, such as UpdateTimes()
\r
496 and UpdateParameters().
\r
497 Note that for site-class models (com.conPSiteClass), gnodes[].conP points
\r
498 to the space for class 0, and the space for class 1 starts (ns-1)*ncode*npatt
\r
499 later. Such repositioning for site classes is achieved in fx_r().
\r
504 com.curconP =! com.curconP;
\r
506 for(locus=0; locus<data.ngene; locus++) {
\r
507 conP = com.conPin[com.curconP] + data.conP_offset[locus];
\r
508 for(i=data.ns[locus]; i<data.ns[locus]*2-1; i++,conP+=com.ncode*data.npatt[locus])
\r
509 gnodes[locus][i].conP = conP;
\r
513 int SetPGene(int igene, int _pi, int _UVRoot, int _alpha, double xcom[])
\r
515 /* This is not used. */
\r
516 if(com.ngene!=1) error2("com.ngene==1?");
\r
521 int SetParameters (double x[])
\r
523 /* This is not used. */
\r
527 int GetPMatBranch2 (double PMat[], double t)
\r
529 /* This calculates the transition probability matrix.
\r
531 double Qrates[2], T,C,A,G,Y,R, mr;
\r
534 Qrates[0]=Qrates[1]=com.kappa;
\r
535 if(com.seqtype==0) {
\r
536 if (com.model<=K80)
\r
537 PMatK80(PMat, t, com.kappa);
\r
538 else if(com.model<=TN93) {
\r
539 T=com.pi[0]; C=com.pi[1]; A=com.pi[2]; G=com.pi[3]; Y=T+C; R=A+G;
\r
540 if (com.model==F84) {
\r
541 Qrates[0]=1+com.kappa/Y; /* kappa1 */
\r
542 Qrates[1]=1+com.kappa/R; /* kappa2 */
\r
544 else if (com.model<=HKY85) Qrates[1]=Qrates[0];
\r
545 mr=1/(2*T*C*Qrates[0] + 2*A*G*Qrates[1] + 2*Y*R);
\r
546 PMatTN93(PMat, t*mr*Qrates[0], t*mr*Qrates[1], t*mr, com.pi);
\r
553 int ConditionalPNode (int inode, int igene, double x[])
\r
555 int n=com.ncode, i,j,k,h, ison;
\r
558 for(i=0; i<nodes[inode].nson; i++) {
\r
559 ison = nodes[inode].sons[i];
\r
560 if (nodes[ison].nson>0 && !com.oldconP[ison])
\r
561 ConditionalPNode(ison, igene, x);
\r
563 for(i=0;i<com.npatt*n;i++) nodes[inode].conP[i] = 1;
\r
565 for(i=0; i<nodes[inode].nson; i++) {
\r
566 ison = nodes[inode].sons[i];
\r
568 t = nodes[ison].branch*_rateSite;
\r
570 printf("\nt =%12.6f ratesite = %.6f", nodes[ison].branch, _rateSite);
\r
571 error2("blength < 0");
\r
574 GetPMatBranch2(PMat, t);
\r
576 if (nodes[ison].nson<1 && com.cleandata) { /* tip && clean */
\r
577 for(h=0; h<com.npatt; h++)
\r
579 nodes[inode].conP[h*n+j] *= PMat[j*n+com.z[ison][h]];
\r
581 else if (nodes[ison].nson<1 && !com.cleandata) { /* tip & unclean */
\r
582 for(h=0; h<com.npatt; h++)
\r
583 for(j=0; j<n; j++) {
\r
584 for(k=0,t=0; k<nChara[com.z[ison][h]]; k++)
\r
585 t += PMat[j*n+CharaMap[com.z[ison][h]][k]];
\r
586 nodes[inode].conP[h*n+j] *= t;
\r
590 for(h=0; h<com.npatt; h++)
\r
591 for(j=0; j<n; j++) {
\r
592 for(k=0,t=0; k<n; k++)
\r
593 t += PMat[j*n+k]*nodes[ison].conP[h*n+k];
\r
594 nodes[inode].conP[h*n+j] *= t;
\r
600 /* node scaling. Is this coded? Please check. */
\r
601 if(com.NnodeScale && com.nodeScale[inode])
\r
602 NodeScale(inode, 0, com.npatt);
\r
606 double lnLmorphF73 (int inode, int locus, double *vp)
\r
608 /* This calculates the lnL for a morphological locus.
\r
609 data.zmorph[locus][] has the morphological measurements.
\r
610 data.Rmorph[locus][i*com.ls+j] is correlation between characters i and j.
\r
611 data.rgene[locus] is the locus rate or mu_locus for sequence data.
\r
613 int s=com.ns, j, h, *sons=nodes[inode].sons;
\r
614 double v[2], x[2], zz, vv, y, t, nu, lnL=0;
\r
617 for(j=0; j<2; j++) {
\r
618 if(nodes[sons[j]].nson)
\r
619 lnL += lnLmorphF73(sons[j], locus, &v[j]);
\r
621 t = nodes[inode].age - nodes[sons[j]].age;
\r
622 nu = (com.clock==1 ? data.rgene[locus] : sptree.nodes[sons[j]].rates[locus]);
\r
627 *vp = nodes[inode].branch + v[0]*v[1]/vv;
\r
628 for(h=0,zz=0; h<com.ls; h++) {
\r
629 for(j=0; j<2; j++) x[j] = data.zmorph[locus][sons[j]][h];
\r
630 data.zmorph[locus][inode][h] = (v[0]*x[1] + v[1]*x[0])/vv;
\r
631 zz += square(x[0] - x[1]);
\r
633 lnL += y = -0.5*com.ls*log(2*Pi*vv) - zz/(2*vv);
\r
635 printf("\nnode %2d sons %2d %2d v %6.4f %7.4f vp %7.4f, x %7.4f lnL = %7.4f %8.4f",
\r
636 inode+1, sons[0]+1, sons[1]+1, v[0],v[1], *vp, data.zmorph[locus][inode][0], y, lnL);
\r
638 if(inode==data.root[locus]) exit(0);
\r
643 double lnpD_locus (int locus)
\r
645 /* This calculates ln{Di|Gi, Bi} using times in sptree.nodes[].age and rates.
\r
646 Rates are in data.rgene[] if (clock==1) and in sptree.nodes[].rates if
\r
647 (clock==0). Branch lengths in the gene tree, nodes[].branch, are calculated
\r
648 in this routine but they may not be up-to-date before calling this routine.
\r
649 UseLocus() is called before this routine.
\r
652 double lnL=0, b, t;
\r
654 if(mcmc.usedata==0) return(0);
\r
655 for(i=0; i<tree.nnode; i++) /* age in gene tree */
\r
656 nodes[i].age = sptree.nodes[nodes[i].ipop].age;
\r
657 if(com.clock==1) { /* global clock */
\r
658 for(i=0; i<tree.nnode; i++) {
\r
659 if(i==tree.root) continue;
\r
660 nodes[i].branch = (nodes[nodes[i].father].age - nodes[i].age) * data.rgene[locus];
\r
661 if(nodes[i].branch < 0)
\r
662 error2("blength < 0");
\r
665 else { /* independent & correlated rates */
\r
666 for(i=0; i<tree.nnode; i++) {
\r
667 if(i==tree.root) continue;
\r
668 for(j=nodes[i].ipop,b=0; j!=nodes[nodes[i].father].ipop; j=dad) {
\r
669 dad = sptree.nodes[j].father;
\r
670 t = sptree.nodes[dad].age - sptree.nodes[j].age;
\r
671 b += t * sptree.nodes[j].rates[locus];
\r
673 nodes[i].branch = b;
\r
676 if(mcmc.usedata==1 && data.datatype[locus]==MORPHC)
\r
677 lnL = lnLmorphF73(data.root[locus], locus, &t);
\r
678 else if(mcmc.usedata==1)
\r
679 lnL = -com.plfun(NULL, -1);
\r
680 else if(mcmc.usedata==2)
\r
681 lnL = lnpD_locus_Approx(locus);
\r
686 double lnpData (double lnpDi[])
\r
688 /* This calculates the log likelihood, the log of the probability of the data
\r
689 given gtree[] for each locus.
\r
690 This updates gnodes[locus][].conP for every node.
\r
696 for(j=0; j<sptree.nspecies*2-1; j++)
\r
697 com.oldconP[j] = 0;
\r
698 for(locus=0; locus<data.ngene; locus++) {
\r
699 UseLocus(locus,0, mcmc.usedata, 1);
\r
700 y = lnpD_locus(locus);
\r
702 if(testlnL && fabs(lnpDi[locus]-y)>1e-5)
\r
703 printf("\tlnLi %.6f != %.6f at locus %d\n", lnpDi[locus], y, locus+1);
\r
711 double lnpD_locus_Approx (int locus)
\r
713 /* This calculates the likelihood using the normal approxiamtion (Thorne et al.
\r
714 1998). The branch lengths on the unrooted tree estimated without clock have
\r
715 been read into nodes[][].label and the gradient and Hessian are in data.Gradient[]
\r
716 & data.Hessian[]. The branch lengths predicted from the rate-evolution
\r
717 model (that is, products of rates and times) are in nodes[].branch.
\r
718 The tree is rooted, and the two branch lengths around the root are summed up
\r
719 and treated as one branch length, compared with the MLE, which is stored as
\r
720 the branch length to the first son of the root.
\r
721 This is different from funSS_AHRS(), which has the data branch lengths in
\r
722 nodes[].branch and calculate the predicted likelihood by multiplying nodes[].age
\r
723 and nodes[].label (which holds the rates). Think about a redesign to avoid confusion.
\r
725 int debug=0, i,j, ib, nb=tree.nnode-1-1; /* don't use tree.nbranch, which is not up to date. */
\r
726 int son1=nodes[tree.root].sons[0], son2=nodes[tree.root].sons[1];
\r
727 double lnL=0, z[NS*2-1], *g=data.Gradient[locus], *H=data.Hessian[locus], cJC=(com.ncode-1.0)/com.ncode;
\r
728 double bTlog=1e-5, elog=0.1, e; /* change the same line in ReadBlengthGH() as well */
\r
730 /* construct branch lengths */
\r
731 for(j=0; j<tree.nnode; j++) {
\r
732 if(j==tree.root || j==son2) continue;
\r
733 ib = nodes[j].ibranch;
\r
735 z[ib] = nodes[son1].branch + nodes[son2].branch;
\r
737 z[ib] = nodes[j].branch;
\r
739 if(data.transform==LOG_B) { /* logarithm transform, MLE not transformed */
\r
740 e = (data.blMLE[locus][ib] < bTlog ? elog : 0);
\r
741 z[ib] = log( (z[ib] + e)/(data.blMLE[locus][ib] + e) );
\r
744 if(data.transform==SQRT_B) /* sqrt transform, MLE transformed */
\r
745 z[ib] = sqrt(z[ib]);
\r
746 else if(data.transform==ARCSIN_B) /* arcsin transform, MLE transformed */
\r
747 z[ib] = 2*asin(sqrt( cJC - cJC*exp(-z[ib]/cJC) ));
\r
748 z[ib] -= data.blMLE[locus][ib];
\r
754 OutTreeN(F0,1,1); FPN(F0);
\r
755 matout(F0, z, 1, nb);
\r
756 matout(F0, data.blMLE[locus], 1, nb);
\r
759 for(i=0; i<nb; i++)
\r
761 for(i=0; i<nb; i++) {
\r
762 lnL += z[i]*z[i]*H[i*nb+i]/2;
\r
764 lnL += z[i]*z[j]*H[i*nb+j];
\r
770 int ReadBlengthGH (char infile[])
\r
772 /* this reads the MLEs of branch lengths under no clock and their SEs, for
\r
773 approximate calculation of sequence likelihood. The MLEs of branch lengths
\r
774 are stored in data.blMLE[locus] as well as nodes[].label, and the gradient
\r
775 and Hessian in data.Gradient[locsu][] and data.Hessian[locus][].
\r
776 The branch length (sum of 2 branch lengths) around root in rooted tree is
\r
777 placed on 1st son of root in rooted tree.
\r
778 For the log transform, data.blMLE[] holds the MLEs of branch lengths, while for
\r
779 other transforms (sqrt, jc), the transformed values are stored.
\r
781 This also frees up memory for sequences.
\r
783 FILE* fBGH = gfopen(infile,"r");
\r
785 int locus, i, j, nb, son1, son2, leftsingle;
\r
786 double small = 1e-20;
\r
787 double dbu[NBRANCH], dbu2[NBRANCH], u, cJC=(com.ncode-1.0)/com.ncode, sin2u, cos2u;
\r
788 double bTlog=1e-5, elog=0.1, e; /* change the line in lnpD_locus_Approx() as well */
\r
789 int debug = 0, longbranches=0;
\r
791 if(noisy) printf("\nReading branch lengths, Gradient & Hessian from %s.\n", infile);
\r
793 for(locus=0; locus<data.ngene; locus++) {
\r
794 UseLocus(locus, 0, 0, 1);
\r
795 printf("Locus %d: %d species\r", locus+1, com.ns);
\r
798 fscanf(fBGH, "%d", &i);
\r
800 printf("ns = %d, expecting %d", i, com.ns);
\r
801 error2("ns not correct in ReadBlengthGH()");
\r
803 if(ReadTreeN(fBGH, &i, &j, 0, 1))
\r
804 error2("error when reading gene tree");
\r
805 if(i==0) error2("gene tree should have branch lengths for error checking");
\r
806 if((nb=tree.nbranch) != com.ns*2-3)
\r
807 error2("nb = ns * 2 -3 ?");
\r
810 FPN(F0); OutTreeN(F0,1,1); FPN(F0);
\r
812 for(i=0; i<tree.nnode; i++) {
\r
813 printf("\nnode %2d: branch %2d (%9.5f) dad %2d ", i+1, nodes[i].ibranch+1, nodes[i].branch, nodes[i].father+1);
\r
814 for(j=0; j<nodes[i].nson; j++) printf(" %2d", nodes[i].sons[j]+1);
\r
818 for(i=0; i<nb; i++) {
\r
819 fscanf(fBGH, "%lf", &data.blMLE[locus][i]);
\r
820 if(data.blMLE[locus][i] > 10) {
\r
821 printf("very large branch length %10.6f\n", data.blMLE[locus][i]);
\r
825 for(i=0; i<nb; i++)
\r
826 fscanf(fBGH, "%lf", &data.Gradient[locus][i]);
\r
828 fscanf(fBGH, "%s", line);
\r
829 if(!strstr(line,"Hessian")) error2("expecting Hessian in in.BV");
\r
830 for(i=0; i<nb; i++)
\r
831 for(j=0; j<nb; j++)
\r
832 if(fscanf(fBGH, "%lf", &data.Hessian[locus][i*nb+j]) != 1)
\r
833 error2("err when reading the hessian matrix in.BV");
\r
835 UseLocus(locus, 0, 0, 1);
\r
837 son1 = nodes[tree.root].sons[0];
\r
838 son2 = nodes[tree.root].sons[1];
\r
839 leftsingle = (nodes[son1].nson==0);
\r
840 if(leftsingle) { /* Root in unrooted tree is 2nd son of root in rooted tree */
\r
841 nodes[son1].ibranch = com.ns*2-3-1; /* last branch in unrooted tree, assuming binary tree */
\r
842 for(i=0; i<tree.nnode; i++) {
\r
843 if(i!=tree.root && i!=son1 && i!=son2)
\r
844 nodes[i].ibranch -= 2;
\r
847 else { /* Root in unrooted tree is 1st son of root in rooted tree */
\r
848 nodes[son1].ibranch = nodes[son2].ibranch - 1;
\r
849 for(i=0; i<tree.nnode; i++) {
\r
850 if(i!=tree.root && i!=son1 && i!=son2)
\r
851 nodes[i].ibranch --;
\r
854 nodes[tree.root].ibranch = nodes[son2].ibranch = -1;
\r
855 for(i=0; i<tree.nnode; i++)
\r
856 nodes[i].branch = (nodes[i].ibranch==-1 ? 0 : data.blMLE[locus][nodes[i].ibranch]);
\r
859 FPN(F0); FPN(F0); OutTreeN(F0,1,1);
\r
860 for(i=0; i<tree.nnode; i++) {
\r
861 printf("\nnode %2d: branch %2d (%9.5f) dad %2d ", i+1, nodes[i].ibranch+1, nodes[i].branch, nodes[i].father+1);
\r
862 for(j=0; j<nodes[i].nson; j++) printf(" %2d", nodes[i].sons[j]+1);
\r
867 if(data.transform==SQRT_B) /* sqrt transform on branch lengths */
\r
868 for(i=0; i<nb; i++) {
\r
869 dbu[i] = 2*sqrt(data.blMLE[locus][i]);
\r
872 else if(data.transform==LOG_B) /* logarithm transform on branch lengths */
\r
873 for(i=0; i<nb; i++) {
\r
874 e = (data.blMLE[locus][i] < bTlog ? elog : 0);
\r
875 dbu[i] = data.blMLE[locus][i] + e;
\r
878 else if(data.transform==ARCSIN_B) { /* arcsin transform on branch lengths */
\r
879 for(i=0; i<nb; i++) {
\r
881 if(data.blMLE[locus][i] < 1e-20)
\r
882 error2("blength should be > 0 for arcsin transform");
\r
884 u = 2*asin(sqrt(cJC - cJC*exp(- data.blMLE[locus][i]/cJC )));
\r
885 sin2u = sin(u/2); cos2u = cos(u/2);
\r
886 dbu[i] = sin2u*cos2u/(1-sin2u*sin2u/cJC);
\r
887 dbu2[i] = (cos2u*cos2u-sin2u*sin2u)/2/(1-sin2u*sin2u/cJC) + dbu[i]*dbu[i]/cJC;
\r
891 if(data.transform) { /* this part is common to all transforms */
\r
892 for(i=0; i<nb; i++) {
\r
894 data.Hessian[locus][j*nb+i] = data.Hessian[locus][i*nb+j] *= dbu[i]*dbu[j];
\r
895 data.Hessian[locus][i*nb+i] = data.Hessian[locus][i*nb+i] * dbu[i]*dbu[i]
\r
896 + data.Gradient[locus][i] * dbu2[i];
\r
898 for(i=0; i<nb; i++)
\r
899 data.Gradient[locus][i] *= dbu[i];
\r
902 if(data.transform==SQRT_B) /* sqrt transform on branch lengths */
\r
903 for(i=0; i<nb; i++)
\r
904 data.blMLE[locus][i] = sqrt(data.blMLE[locus][i]);
\r
905 else if(data.transform==LOG_B) { /* logarithm transform on branch lengths */
\r
908 else if(data.transform==ARCSIN_B) /* arcsin transform on branch lengths */
\r
909 for(i=0; i<nb; i++)
\r
910 data.blMLE[locus][i] = 2*asin(sqrt(cJC - cJC*exp(-data.blMLE[locus][i]/cJC )));
\r
914 /* free up memory for sequences */
\r
915 for(locus=0; locus<data.ngene; locus++) {
\r
916 free(data.fpatt[locus]);
\r
917 for(j=0; j<data.ns[locus]; j++)
\r
918 free(data.z[locus][j]);
\r
921 if(longbranches) printf("\a\n%d branch lengths are >10. Check that you are not using random sequences.", longbranches);
\r
926 int GenerateBlengthGH (char infile[])
\r
928 /* This generates the sequence alignment and tree files for calculating branch
\r
929 lengths, gradient, and hessian.
\r
930 This mimics Jeff Thorne's estbranches program.
\r
932 FILE *fseq, *ftree, *fctl;
\r
933 FILE *fBGH = gfopen(infile, "w");
\r
934 char tmpseqf[32], tmptreef[32], ctlf[32], outf[32], line[10000];
\r
938 for(locus=0; locus<data.ngene; locus++) {
\r
939 printf("\n\n*** Locus %d ***\n", locus+1);
\r
940 sprintf(tmpseqf, "tmp%04d.txt", locus+1);
\r
941 sprintf(tmptreef, "tmp%04d.trees", locus+1);
\r
942 sprintf(ctlf, "tmp%04d.ctl", locus+1);
\r
943 sprintf(outf, "tmp%04d.out", locus+1);
\r
944 fseq = gfopen(tmpseqf,"w");
\r
945 ftree = gfopen(tmptreef,"w");
\r
946 fctl = gfopen(ctlf,"w");
\r
948 UseLocus(locus, 0, 0, 1);
\r
949 com.cleandata = data.cleandata[locus];
\r
950 for(i=0; i<com.ns; i++)
\r
951 com.z[i] = data.z[locus][i];
\r
952 com.npatt = com.posG[1]=data.npatt[locus];
\r
954 com.fpatt = data.fpatt[locus];
\r
958 printPatterns(fseq);
\r
959 fprintf(ftree, " 1\n\n");
\r
960 OutTreeN(ftree, 1, 0); FPN(ftree);
\r
962 fprintf(fctl, "seqfile = %s\n", tmpseqf);
\r
963 fprintf(fctl, "treefile = %s\n", tmptreef);
\r
964 fprintf(fctl, "outfile = %s\nnoisy = 3\n", outf);
\r
966 fprintf(fctl, "seqtype = %d\n", com.seqtype);
\r
968 fprintf(fctl, "model = %d\n", com.model);
\r
969 if(com.seqtype==1) {
\r
970 fprintf(fctl, "icode = %d\n", com.icode);
\r
971 fprintf(fctl, "fix_kappa = 0\n kappa = 2\n");
\r
973 if(com.seqtype==2) {
\r
974 fprintf(fctl, "aaRatefile = %s\n", com.daafile);
\r
977 fprintf(fctl, "fix_alpha = 0\nalpha = 0.5\nncatG = %d\n", com.ncatG);
\r
978 fprintf(fctl, "Small_Diff = 0.1e-6\ngetSE = 2\n");
\r
979 fprintf(fctl, "method = %d\n", (com.alpha==0||com.ns>100 ? 1 : 0));
\r
981 fclose(fseq); fclose(ftree); fclose(fctl);
\r
983 if(com.seqtype) sprintf(line, "codeml %s", ctlf);
\r
984 else sprintf(line, "baseml %s", ctlf);
\r
985 printf("running %s\n", line);
\r
988 appendfile(fBGH, "rst2");
\r
995 int GetOptions (char *ctlf)
\r
997 int transform0=ARCSIN_B; /* default transform: SQRT_B, LOG_B, ARCSIN_B */
\r
998 int iopt, i, j, nopt=29, lline=4096;
\r
999 char line[4096], *pline, *peq, opt[32], *comment="*#";
\r
1000 char *optstr[] = {"seed", "seqfile","treefile", "outfile", "mcmcfile",
\r
1001 "seqtype", "aaRatefile", "icode", "noisy", "usedata", "ndata", "model", "clock",
\r
1002 "TipDate", "RootAge", "fossilerror", "alpha", "ncatG", "cleandata",
\r
1003 "BDparas", "kappa_gamma", "alpha_gamma",
\r
1004 "rgene_gamma", "sigma2_gamma", "print", "burnin", "sampfreq",
\r
1005 "nsample", "finetune"};
\r
1006 double t=1, *eps=mcmc.finetune;
\r
1007 FILE *fctl=gfopen (ctlf, "r");
\r
1009 data.transform = transform0;
\r
1011 if (noisy) printf ("\nReading options from %s..\n", ctlf);
\r
1013 if (fgets (line, lline, fctl) == NULL) break;
\r
1014 for (i=0,t=0,pline=line; i<lline&&line[i]; i++)
\r
1015 if (isalnum(line[i])) { t=1; break; }
\r
1016 else if (strchr(comment,line[i])) break;
\r
1017 if (t==0) continue;
\r
1018 if ((pline=strstr(line, "="))==NULL)
\r
1019 error2("option file.");
\r
1020 *pline='\0'; sscanf(line, "%s", opt); *pline='=';
\r
1021 sscanf(pline+1, "%lf", &t);
\r
1023 for (iopt=0; iopt<nopt; iopt++) {
\r
1024 if (strncmp(opt, optstr[iopt], 8)==0) {
\r
1026 printf ("\n%3d %15s | %-20s %6.2f", iopt+1,optstr[iopt],opt,t);
\r
1028 case ( 0): SetSeed((int)t, 1); break;
\r
1029 case ( 1): sscanf(pline+1, "%s", com.seqf); break;
\r
1030 case ( 2): sscanf(pline+1, "%s", com.treef); break;
\r
1031 case ( 3): sscanf(pline+1, "%s", com.outf); break;
\r
1032 case ( 4): sscanf(pline+1, "%s", com.mcmcf); break;
\r
1033 case ( 5): com.seqtype=(int)t; break;
\r
1034 case ( 6): sscanf(pline+2,"%s", com.daafile); break;
\r
1035 case ( 7): com.icode=(int)t; break;
\r
1036 case ( 8): noisy=(int)t; break;
\r
1038 j=sscanf(pline+1, "%d %s%d", &mcmc.usedata, com.inBVf, &data.transform);
\r
1039 if(mcmc.usedata==2)
\r
1040 if(strchr(com.inBVf, '*')) { strcpy(com.inBVf, "in.BV"); data.transform=transform0; }
\r
1041 else if(j==2) data.transform=transform0;
\r
1043 case (10): com.ndata=(int)t; break;
\r
1044 case (11): com.model=(int)t; break;
\r
1045 case (12): com.clock=(int)t; break;
\r
1047 sscanf(pline+2, "%lf%lf", &com.TipDate, &com.TipDate_TimeUnit);
\r
1048 if(com.TipDate && com.TipDate_TimeUnit==0) error2("should set com.TipDate_TimeUnit");
\r
1049 data.transform = SQRT_B; /* SQRT_B, LOG_B, ARCSIN_B */
\r
1052 sptree.RootAge[2] = sptree.RootAge[3] = 0.025; /* default tail probs */
\r
1053 if((strchr(line, '>') || strchr(line, '<')) && (strstr(line, "U(") || strstr(line, "B(")))
\r
1054 error2("don't mix < U B on the RootAge line");
\r
1056 if((pline=strchr(line, '>')))
\r
1057 sscanf(pline+1, "%lf", &sptree.RootAge[0]);
\r
1058 if((pline=strchr(line,'<'))) { /* RootAge[0]=0 */
\r
1059 sscanf(pline+1, "%lf", &sptree.RootAge[1]);
\r
1061 if((pline=strstr(line, "U(")))
\r
1062 sscanf(pline+2, "%lf,%lf", &sptree.RootAge[1], &sptree.RootAge[2]);
\r
1063 else if((pline=strstr(line, "B(")))
\r
1064 sscanf(pline+2, "%lf,%lf,%lf,%lf", &sptree.RootAge[0], &sptree.RootAge[1], &sptree.RootAge[2], &sptree.RootAge[3]);
\r
1067 data.pfossilerror[0] = 0.0;
\r
1068 data.pfossilerror[2] = 1; /* default: minimum 2 good fossils */
\r
1069 sscanf(pline+1, "%lf%lf%lf", data.pfossilerror, data.pfossilerror+1, data.pfossilerror+2);
\r
1071 case (16): com.alpha=t; break;
\r
1072 case (17): com.ncatG=(int)t; break;
\r
1073 case (18): com.cleandata=(int)t; break;
\r
1075 sscanf(pline+1,"%lf%lf%lf%lf", &data.BDS[0],&data.BDS[1],&data.BDS[2],&data.BDS[3]);
\r
1078 sscanf(pline+1,"%lf%lf", data.kappagamma, data.kappagamma+1); break;
\r
1080 sscanf(pline+1,"%lf%lf", data.alphagamma, data.alphagamma+1); break;
\r
1082 sscanf(pline+1,"%lf%lf%lf", data.rgenegD, data.rgenegD+1, data.rgenegD+2);
\r
1083 if(data.rgenegD[2]<=0) data.rgenegD[2]=1;
\r
1086 sscanf(pline+1,"%lf%lf%lf", data.sigma2gD, data.sigma2gD+1, data.sigma2gD+2);
\r
1087 if(data.sigma2gD[2]<=0) data.sigma2gD[2]=1;
\r
1089 case (24): mcmc.print=(int)t; break;
\r
1090 case (25): mcmc.burnin=(int)t; break;
\r
1091 case (26): mcmc.sampfreq=(int)t; break;
\r
1092 case (27): mcmc.nsample=(int)t; break;
\r
1094 sscanf(pline+1,"%d:%lf%lf%lf%lf%lf%lf", &mcmc.resetFinetune, eps,eps+1,eps+2,eps+3,eps+4,eps+5);
\r
1101 { printf ("\noption %s in %s\n", opt, ctlf); exit (-1); }
\r
1106 if (noisy) error2("\nno ctl file..");
\r
1108 if(com.ndata>NGENE) error2("raise NGENE?");
\r
1109 else if(com.ndata<=0) com.ndata=1;
\r
1111 if(com.seqtype==2)
\r
1114 if(com.alpha==0) { com.fix_alpha=1; com.nalpha=0; }
\r
1115 if(com.clock<1 || com.clock>3) error2("clock should be 1, 2, 3?");
\r
1120 double lnPDFInfinitesitesClock (double t1, double FixedDs[])
\r
1122 /* This calculates the ln of the joint pdf, which is proportional to the
\r
1123 posterior for given root age, assuming infinite sites. Fixed distances are
\r
1124 in FixedDs[]: d11, d12, ..., d(1,s-1), d21, d31, ..., d_g1.
\r
1125 Note that the posterior is one dimensional, and the variable used is root age.
\r
1127 int s=sptree.nspecies, g=data.ngene, i,j;
\r
1128 double lnp, summu=0, prodmu=1, *gD=data.rgenegD;
\r
1130 sptree.nodes[sptree.root].age=t1;
\r
1131 for(j=s; j<sptree.nnode; j++)
\r
1132 if(j!=sptree.root)
\r
1133 sptree.nodes[j].age = t1*FixedDs[j-s]/FixedDs[0];
\r
1135 lnp = lnpriorTimes();
\r
1137 data.rgene[0] = FixedDs[0]/t1;
\r
1138 for(i=1; i<g; i++) {
\r
1139 data.rgene[i] = FixedDs[s-1+i-1]/t1;
\r
1140 summu += data.rgene[j];
\r
1141 prodmu *= data.rgene[j];
\r
1143 lnp += (gD[0]-gD[2]*g)*log(summu) - gD[1]/g*summu + (gD[2]-1)*log(prodmu); /* f(mu_i) */
\r
1144 lnp += (2-s)*log(FixedDs[0]/t1) - g*log(t1); /* Jacobi */
\r
1149 double InfinitesitesClock (double *FixedDs)
\r
1151 /* This runs MCMC to calculate the posterior density of the root age
\r
1152 when there are infinite sites at each locus. The clock is assumed, so that
\r
1153 the posterior is one-dimensional.
\r
1155 int i,j, ir, nround=0, nsaved=0;
\r
1156 int s=sptree.nspecies, g=data.ngene;
\r
1157 double t, tnew, naccept=0;
\r
1158 double e=mcmc.finetune[0], lnp, lnpnew, lnacceptance, c, *x, Pjump=0;
\r
1159 double tmean, t025,t975, tL, tU;
\r
1162 matout2(F0, FixedDs, 1, s-1+g-1, 8, 4);
\r
1163 printf("\nRunning MCMC to get %d samples for t0 (root age)\n", mcmc.nsample);
\r
1164 t=sptree.nodes[sptree.root].age;
\r
1165 lnp = lnPDFInfinitesitesClock(t, FixedDs);
\r
1166 x=(double*)malloc(max2(mcmc.nsample,(s+g)*3)*sizeof(double));
\r
1167 if(x==NULL) error2("oom x");
\r
1169 for(ir=-mcmc.burnin,tmean=0; ir<mcmc.nsample*mcmc.sampfreq; ir++) {
\r
1170 if(ir==0 || (ir<0 && ir%(mcmc.burnin/2)==0)) {
\r
1171 nround=0; naccept=0; tmean=0;
\r
1172 ResetFinetuneSteps(NULL, &Pjump, &e, 1);
\r
1174 lnacceptance = e*rndSymmetrical();
\r
1175 c = exp(lnacceptance);
\r
1177 lnpnew = lnPDFInfinitesitesClock(tnew, FixedDs);
\r
1178 lnacceptance += lnpnew-lnp;
\r
1180 if(lnacceptance>=0 || rndu()<exp(lnacceptance)) {
\r
1181 t=tnew; lnp=lnpnew; naccept++;
\r
1184 tmean = (tmean*(nround-1)+t)/nround;
\r
1186 if(ir>=0 && (ir+1)%mcmc.sampfreq==0)
\r
1189 Pjump = naccept/nround;
\r
1190 if((ir+1)%max2(mcmc.sampfreq, mcmc.sampfreq*mcmc.nsample/10000)==0)
\r
1191 printf("\r%3.0f%% %7.2f mean t0 = %9.6f", (ir+1.)/(mcmc.nsample*mcmc.sampfreq)*100, Pjump,tmean);
\r
1192 if(mcmc.sampfreq*mcmc.nsample>20 && (ir+1)%(mcmc.sampfreq*mcmc.nsample/10)==0)
\r
1193 printf(" %s\n", printtime(timestr));
\r
1196 qsort(x, (size_t)mcmc.nsample, sizeof(double), comparedouble);
\r
1197 t025 = x[(int)(mcmc.nsample*.025+.5)];
\r
1198 t975 = x[(int)(mcmc.nsample*.975+.5)];
\r
1200 /* Below x[] is used to collect the posterior means and 95% CIs */
\r
1201 for(i=0; i<3; i++) {
\r
1202 t = (i==0 ? tmean : (i==1 ? t025 : t975));
\r
1203 lnPDFInfinitesitesClock(t, FixedDs); /* calculates t and mu */
\r
1204 for(j=s; j<sptree.nnode; j++)
\r
1205 x[i*(s+g)+(j-s)] = sptree.nodes[j].age;
\r
1206 for(j=0; j<g; j++)
\r
1207 x[i*(s+g)+s-1+j] = data.rgene[j];
\r
1209 printf("\nmean (95%% CI) CI-width for times\n\n");
\r
1210 for(j=s; j<sptree.nnode; j++) {
\r
1211 tL = x[(s+g)+j-s];
\r
1212 tU = x[2*(s+g)+j-s];
\r
1213 printf("Node %2d: %9.6f (%9.6f, %9.6f) %9.6f\n", j+1, x[j-s], tL, tU, tU-tL);
\r
1215 printf("\nmean & 95%% CI for rates\n\n");
\r
1216 for(j=0; j<g; j++)
\r
1217 printf("gene %2d: %9.6f (%9.6f, %9.6f)\n", j+1,x[s-1+j], x[2*(s+g)+s-1+j], x[(s+g)+s-1+j]);
\r
1219 printf("\nNote: the posterior has only one dimension.\n");
\r
1225 double lnPDFInfinitesitesClock23 (double FixedDs[])
\r
1227 /* This calculates the log joint pdf, which is proportional to the posterior
\r
1228 when there are infinite sites at each locus. The data are fixed branch lengths,
\r
1229 stored in FixedDs, locus by locus.
\r
1230 The variables in the posterior are node ages, rate r0 for root son0, and mu & sigma2.
\r
1231 This PDF includes f(t1,...,t_{s-1})*f(r_ij), but not f(mu_i) or f(sigma2_i) for loci.
\r
1232 The latter are used to calculate ratios in the MCMC algorithm.
\r
1234 int s=sptree.nspecies, g=data.ngene, locus,j;
\r
1235 int root=sptree.root, *sons=sptree.nodes[root].sons;
\r
1236 double lnJ, lnp, b, r0; /* r0 is rate for root son0, fixed. */
\r
1237 double t, t0=sptree.nodes[root].age - sptree.nodes[sons[0]].age;
\r
1238 double t1=sptree.nodes[root].age - sptree.nodes[sons[1]].age;
\r
1239 double summu=0, prodmu=1, *gD=data.rgenegD, *gDs=data.sigma2gD;
\r
1241 /* compute branch rates using times and branch lengths */
\r
1242 for(locus=0; locus<g; locus++) {
\r
1243 for(j=0; j<sptree.nnode; j++) {
\r
1244 if(j==root || j==sons[0]) continue; /* r0 is in the posterior */
\r
1245 t = sptree.nodes[nodes[j].father].age - sptree.nodes[j].age;
\r
1250 b=FixedDs[locus*sptree.nnode+sons[0]];
\r
1251 r0=sptree.nodes[sons[0]].rates[locus];
\r
1252 sptree.nodes[j].rates[locus] = (b-r0*t0)/t1;
\r
1253 if(r0<=0 || t1<=0 || b-r0*t0<=0) {
\r
1254 printf("\nr0 = %.6f t1 = %.6f b-t0*t0 = %.6f", r0, t1, b-r0*t0);
\r
1255 error2("r<=0 || t1<=0 || b-r0*t0<=0...");
\r
1259 sptree.nodes[j].rates[locus] = FixedDs[locus*sptree.nnode+j]/t;
\r
1263 printf("\n (age tlength) rates & branchlengths\n");
\r
1264 for(j=0; j<sptree.nnode; j++,FPN(F0)) {
\r
1265 t = (j==root? -1 : sptree.nodes[nodes[j].father].age - sptree.nodes[j].age);
\r
1266 printf("%2d (%7.4f%9.4f) ", j+1,sptree.nodes[j].age, t);
\r
1267 for(locus=0; locus<g; locus++) printf(" %9.4f", sptree.nodes[j].rates[locus]);
\r
1269 for(locus=0; locus<g; locus++) printf(" %9.4f", FixedDs[locus*sptree.nnode+j]);
\r
1274 lnp = lnpriorTimes() + lnpriorRates();
\r
1275 for(j=0,lnJ=-log(t1); j<sptree.nnode; j++)
\r
1276 if(j!=root && j!=sons[0] && j!=sons[1])
\r
1277 lnJ -= log(sptree.nodes[nodes[j].father].age - sptree.nodes[j].age);
\r
1284 double Infinitesites(FILE *fout)
\r
1286 /* This reads the fixed branch lengths and runs MCMC to calculate the posterior
\r
1287 of times and rates when there are infinite sites at each locus.
\r
1288 This is implemented for the global clock model (clock = 1) and the
\r
1289 independent-rates model (clock = 2). I think this works for clock3 as well.
\r
1290 mcmc.finetune[] is used to propose changes: 0: t, 1:mu, 2:r; 3:mixing, 4:sigma2.
\r
1292 For clock=2, the MCMC samples the following variables:
\r
1293 s-1 times, rate r0 for left root branch at each locus,
\r
1294 mu at each locus & sigma2 at each locus.
\r
1296 int root=sptree.root, *sons=sptree.nodes[root].sons;
\r
1297 int lline=10000, locus, nround, ir, i,j,k, ip, s=sptree.nspecies, g=data.ngene;
\r
1298 int nxpr[2]={8,3};
\r
1301 double *e=mcmc.finetune, y, ynew, yb[2], sumold, sumnew, *gD, naccept[5]={0}, Pjump[5]={0};
\r
1302 double lnL, lnLnew, lnacceptance, c, lnc;
\r
1303 double *x,*mx, *FixedDs,*b, maxt0,tson[2];
\r
1304 char *FidedDf[2]={"FixedDsClock1.txt", "FixedDsClock23.txt"};
\r
1305 FILE *fdin=gfopen(FidedDf[com.clock>1],"r"), *fmcmc=gfopen(com.mcmcf,"w");
\r
1307 com.model=0; com.alpha=0;
\r
1313 printf("\nInfiniteSites, reading fixed distance data from %s (s = %d g = %d):\n\n", FidedDf[com.clock>1], s,g);
\r
1314 com.np = s-1 + g + g + g; /* times, mu, sigma2, rates */
\r
1315 FixedDs = (double*)malloc((g*sptree.nnode+com.np*2)*sizeof(double));
\r
1316 if(FixedDs==NULL) error2("oom");
\r
1317 x = FixedDs+g*sptree.nnode;
\r
1319 fscanf(fdin, "%d", &i);
\r
1320 if(i!=s) error2("wrong number of species in FixedDs.txt");
\r
1322 if(data.pfossilerror[0]) {
\r
1323 puts("model of fossil errors for infinite data not tested yet.");
\r
1325 SetupPriorTimesFossilErrors();
\r
1328 if(com.clock==1) { /* global clock: read FixedDs[] and close file. */
\r
1329 for(i=0; i<s-1+g-1; i++)
\r
1330 fscanf(fdin, "%lf", &FixedDs[i]);
\r
1332 InfinitesitesClock(FixedDs);
\r
1336 /* print header line in the mcmc.txt sample file */
\r
1337 fprintf(fmcmc, "Gen");
\r
1338 for(i=s; i<sptree.nnode; i++) fprintf(fmcmc, "\tt_n%d", i+1);
\r
1339 for(j=0; j<g; j++) fprintf(fmcmc, "\tmu_L%d", j+1);
\r
1340 for(j=0; j<g; j++) fprintf(fmcmc, "\tsigma2_L%d", j+1);
\r
1341 for(j=0; j<g; j++) fprintf(fmcmc, "\tr_left_L%d", j+1);
\r
1342 fprintf(fmcmc, "\n");
\r
1344 for(locus=0,b=FixedDs,nodes=nodes_t; locus<g; locus++,b+=sptree.nnode) {
\r
1345 ReadTreeN(fdin,&i,&i,1,1);
\r
1346 OutTreeN(F0, 1, 1); FPN(F0);
\r
1347 if(tree.nnode!=sptree.nnode)
\r
1348 error2("use species tree for each locus!");
\r
1350 b[sons[0]] = nodes[sons[0]].branch+nodes[sons[1]].branch;
\r
1352 for(j=0; j<sptree.nnode; j++) {
\r
1353 if(j!=root && j!=sons[0] && j!=sons[1])
\r
1354 b[j] = nodes[j].branch;
\r
1359 printf("\nFixed distances at %d loci\n", g);
\r
1360 for(j=0; j<sptree.nnode; j++,FPN(F0)) {
\r
1361 printf("node %3d ", j+1);
\r
1362 for(locus=0; locus<g; locus++)
\r
1363 printf(" %9.6f", FixedDs[locus*sptree.nnode+j]);
\r
1366 for(i=0; i<g; i++) { /* GetInitial() is unsafe. Reset r0 so that r0*t0<b0. */
\r
1367 y = FixedDs[i*sptree.nnode+sons[0]]/(sptree.nodes[root].age-sptree.nodes[sons[0]].age);
\r
1368 sptree.nodes[sons[0]].rates[i] = y*rndu();
\r
1371 for(i=0; i<com.np; i++) mx[i]=0;
\r
1372 for(i=0,k=0; i<s-1; i++) x[k++]=sptree.nodes[s+i].age;
\r
1373 for(i=0; i<g; i++) x[k++]=data.rgene[i];
\r
1374 for(i=0; i<g; i++) x[k++]=data.sigma2[i];
\r
1375 for(i=0; i<g; i++) x[k++]=sptree.nodes[sons[0]].rates[i];
\r
1377 lnL = lnPDFInfinitesitesClock23(FixedDs);
\r
1379 printf("\nStarting MCMC (np=%d) lnp = %9.3f\nInitials: ", com.np, lnL);
\r
1380 for(i=0; i<com.np; i++) printf(" %5.3f", x[i]);
\r
1381 printf("\n\nparas: %d times, %d mu, %d sigma2, %d rates r0 (left daughter of root)", s-1, g, g, g);
\r
1382 printf("\nUsing finetune parameters from the control file\n");
\r
1384 /* MCMC proposals: t (0), mu (1), sigma2 (4), r0 (2), mixing (3) */
\r
1385 for(ir=-mcmc.burnin,nround=0; ir<mcmc.sampfreq*mcmc.nsample; ir++) {
\r
1386 if(ir==0 || (ir<0 && ir%(mcmc.burnin/4)==0)) {
\r
1387 nround=0; zero(naccept,5); zero(mx,com.np);
\r
1388 ResetFinetuneSteps(NULL, Pjump, mcmc.finetune, 5);
\r
1390 for(ip=0; ip<com.np; ip++) {
\r
1392 if(ip<s-1) { /* times */
\r
1393 y = sptree.nodes[s+ip].age;
\r
1394 for(i=0; i<2; i++) tson[i] = sptree.nodes[sptree.nodes[s+ip].sons[i]].age;
\r
1395 yb[0] = max2(tson[0], tson[1]);
\r
1396 yb[1] = (s+ip==root ? OldAge : sptree.nodes[sptree.nodes[s+ip].father].age);
\r
1397 if(s+ip == root) {
\r
1398 for(i=0; i<g; i++) {
\r
1399 maxt0 = FixedDs[i*sptree.nnode+sons[0]]/sptree.nodes[sons[0]].rates[i];
\r
1400 yb[1] = min2(yb[1], tson[0]+maxt0);
\r
1403 else if(s+ip==sons[0]) {
\r
1404 for(i=0; i<g; i++) {
\r
1405 maxt0 = FixedDs[i*sptree.nnode+sons[0]]/sptree.nodes[sons[0]].rates[i];
\r
1406 yb[0] = max2(yb[0], sptree.nodes[root].age-maxt0);
\r
1409 ynew = y + e[0]*rndSymmetrical();
\r
1410 ynew = sptree.nodes[s+ip].age = reflect(ynew,yb[0],yb[1]);
\r
1412 else if(ip-(s-1)<g) { /* mu for loci */
\r
1413 gD = data.rgenegD;
\r
1414 for(j=0,sumold=0; j<g; j++) sumold += data.rgene[j];
\r
1415 lnacceptance = lnc = e[1]*rndSymmetrical();
\r
1417 y = data.rgene[ip-(s-1)];
\r
1418 ynew = data.rgene[ip-(s-1)] *= c;
\r
1420 sumnew = sumold + ynew - y;
\r
1421 lnacceptance += (gD[0]-gD[2]*g)*log(sumnew/sumold) - gD[1]/g*(ynew-y) + (gD[2]-1)*lnc;
\r
1423 else if (ip-(s-1+g)<g) { /* sigma2 for loci */
\r
1424 gD = data.sigma2gD;
\r
1425 for(j=0,sumold=0; j<g; j++) sumold += data.sigma2[j];
\r
1426 lnacceptance = lnc = e[4]*rndSymmetrical();
\r
1428 y = data.sigma2[ip-(s-1+g)];
\r
1429 ynew = data.sigma2[ip-(s-1+g)] *= c;
\r
1430 sumnew = sumold + ynew - y;
\r
1431 lnacceptance += (gD[0]-gD[2]*g)*log(sumnew/sumold) - gD[1]/g*(ynew-y) + (gD[2]-1)*lnc;
\r
1433 else { /* rate r0 for root son0 for loci (r0*t0<b0) */
\r
1434 y = sptree.nodes[root].age-sptree.nodes[sons[0]].age;
\r
1436 printf("age error");
\r
1438 yb[1] = FixedDs[(ip-(s-1+g*2))*sptree.nnode+sons[0]]/y;
\r
1439 y = sptree.nodes[sons[0]].rates[ip-(s-1+g*2)];
\r
1440 ynew = y + e[2]*rndSymmetrical();
\r
1441 ynew = sptree.nodes[sons[0]].rates[ip-(s-1+g*2)] = reflect(ynew,yb[0],yb[1]);
\r
1445 lnLnew = lnPDFInfinitesitesClock23(FixedDs);
\r
1446 lnacceptance += lnLnew-lnL;
\r
1448 if(lnacceptance>=0 || rndu()<exp(lnacceptance)) {
\r
1449 x[ip]=ynew; lnL=lnLnew;
\r
1450 if(ip<s-1) naccept[0] += 1.0/(s-1); /* t */
\r
1451 else if(ip<s-1+g) naccept[1] += 1.0/g; /* mu */
\r
1452 else if(ip<s-1+g*2) naccept[4] += 1.0/g; /* sigma2 */
\r
1453 else naccept[2] += 1.0/g; /* r0 for son0 */
\r
1456 if(ip<s-1) /* t */
\r
1457 sptree.nodes[s+ip].age = y;
\r
1458 else if(ip-(s-1)<g) /* mu */
\r
1459 data.rgene[ip-(s-1)] = y;
\r
1460 else if(ip-(s-1+g)<g) /* sigma2 */
\r
1461 data.sigma2[ip-(s-1+g)] = y;
\r
1462 else /* r0 for son0 */
\r
1463 sptree.nodes[sons[0]].rates[ip-(s-1+g*2)] = y;
\r
1465 } /* for(ip, com.np) */
\r
1467 if(MixingStep) { /* this multiplies times by c and divides mu and r by c. */
\r
1468 lnc = e[3]*rndSymmetrical();
\r
1470 lnacceptance = (s - 1 - g - g)*(lnc);
\r
1472 gD = data.rgenegD;
\r
1473 for(j=0,sumold=0; j<g; j++) sumold += data.rgene[j];
\r
1474 sumnew = sumold/c;
\r
1475 lnacceptance += (gD[0]-gD[2]*g)*log(sumnew/sumold) - gD[1]/g*(sumnew-sumold) - (gD[2]-1)*g*lnc;
\r
1477 for(j=s; j<sptree.nnode; j++) sptree.nodes[j].age *= c;
\r
1478 for(i=0; i<g; i++) data.rgene[i] /= c;
\r
1479 for(i=0; i<g; i++) sptree.nodes[sons[0]].rates[i] /= c;
\r
1480 lnLnew = lnPDFInfinitesitesClock23(FixedDs);
\r
1481 lnacceptance += lnLnew-lnL;
\r
1482 if(lnacceptance>=0 || rndu()<exp(lnacceptance)) {
\r
1487 for(j=s; j<sptree.nnode; j++) sptree.nodes[j].age/=c;
\r
1488 for(i=0; i<g; i++) data.rgene[i] *= c;
\r
1489 for(i=0; i<g; i++) sptree.nodes[sons[0]].rates[i] *= c;
\r
1494 for(j=s,k=0; j<sptree.nnode; j++) x[k++]=sptree.nodes[j].age;
\r
1495 for(i=0; i<g; i++) x[k++]=data.rgene[i];
\r
1496 for(i=0; i<g; i++) x[k++]=data.sigma2[i];
\r
1497 for(i=0; i<g; i++) x[k++]=sptree.nodes[sons[0]].rates[i];
\r
1498 for(i=0; i<com.np; i++) mx[i] = (mx[i]*(nround-1)+x[i])/nround;
\r
1499 for(i=0; i<5; i++) Pjump[i] = naccept[i]/nround;
\r
1501 k = mcmc.sampfreq*mcmc.nsample;
\r
1502 if(noisy && (k<=10000 || (ir+1)%(k/2000)==0)) {
\r
1503 printf("\r%3.0f%%", (ir+1.)/(mcmc.nsample*mcmc.sampfreq)*100.);
\r
1504 for(j=0; j<5; j++) printf(" %4.2f", Pjump[j]); printf(" ");
\r
1505 if(com.np<nxpr[0]+nxpr[1]) { nxpr[0]=com.np; nxpr[1]=0; }
\r
1506 for(j=0; j<nxpr[0]; j++) printf(" %5.3f", mx[j]);
\r
1507 if(com.np>nxpr[0]+nxpr[1] && nxpr[1]) printf(" -");
\r
1508 for(j=0; j<nxpr[1]; j++) printf(" %5.3f", mx[com.np-nxpr[1]+j]);
\r
1509 printf(" %5.2f", lnL);
\r
1511 if(mcmc.sampfreq*mcmc.nsample>20 && (ir+1)%(mcmc.sampfreq*mcmc.nsample/20)==0)
\r
1512 printf(" %s\n", printtime(timestr));
\r
1513 if(ir>0 && (ir+1)%mcmc.sampfreq==0) {
\r
1514 fprintf(fmcmc, "%d", ir+1);
\r
1515 for(i=0; i<com.np; i++)
\r
1516 fprintf(fmcmc, "\t%.5f", x[i]); FPN(fmcmc);
\r
1521 DescriptiveStatisticsSimpleMCMCTREE(fout, com.mcmcf, 1);
\r
1529 int DownSptreeSetTime (int inode)
\r
1531 /* This goes down the species tree, from the root to the tips, to specify the
\r
1532 initial node ages. If the age of inode is not set already, it will
\r
1534 This is called by GetInitials().
\r
1536 int j,ison, correctionnews=0;
\r
1538 for (j=0; j<sptree.nodes[inode].nson; j++) {
\r
1539 ison = sptree.nodes[inode].sons[j];
\r
1540 if(sptree.nodes[ison].nson) { /* ison is not tip */
\r
1541 if(sptree.nodes[ison].age == -1) {
\r
1542 sptree.nodes[ison].age = sptree.nodes[inode].age * (.6+.4*rndu());
\r
1543 correctionnews ++;
\r
1545 else if (sptree.nodes[ison].age > sptree.nodes[inode].age) {
\r
1546 sptree.nodes[ison].age = sptree.nodes[inode].age * (0.95+0.5*rndu());
\r
1547 correctionnews ++;
\r
1549 correctionnews += DownSptreeSetTime(ison);
\r
1552 return(correctionnews);
\r
1555 int GetInitials ()
\r
1557 /* This sets the initial values for starting the MCMC, and returns np, the
\r
1558 number of parameters in the MCMC, to be collected in collectx().
\r
1559 The routine guarantees that each node age is younger than its ancestor's age.
\r
1560 It does not check the consistency of divergence times against the fossil
\r
1561 constraints. As the model assumes soft bounds, any divergence times are
\r
1562 possible, even though this means that the chain might start from a poor
\r
1565 int np=0, i,j,k, jj, dad, nchanges, g=data.ngene;
\r
1566 double maxlower=0; /* rough age for root */
\r
1567 double *p=sptree.nodes[sptree.root].pfossil, smallt=(p[1]-p[0])/(40*sqrt(sptree.nspecies));
\r
1568 double a_r=data.rgenegD[0], b_r=data.rgenegD[1], a,b, smallr=1e-3, d;
\r
1569 double AgeLow[NS]={0}, tz;
\r
1571 com.rgene[0]=-1; /* com.rgene[] is not used. -1 to force crash */
\r
1572 puts("\ngetting initial values to start MCMC.");
\r
1574 if(com.TipDate) { /* TipDate model */
\r
1575 /* set up initial node ages by looking at the minimum ages at each node */
\r
1576 if(sptree.nodes[sptree.root].fossil == BOUND_F)
\r
1577 sptree.nodes[sptree.root].age = p[0] + (p[1]-p[0])*rndu();
\r
1579 error2("\nthere should be something on the root age for the TipDate model\n");
\r
1581 for(j=0; j<sptree.nspecies; j++) {
\r
1582 tz = sptree.nodes[j].age;
\r
1583 for(k=sptree.nodes[j].father; k!=-1; k=sptree.nodes[k].father)
\r
1584 if(tz < AgeLow[k-sptree.nspecies])
\r
1587 AgeLow[k-sptree.nspecies] = tz;
\r
1589 for(j=sptree.nspecies+1; j<sptree.nnode; j++) {
\r
1590 jj = j-sptree.nspecies;
\r
1591 sptree.nodes[j].age = AgeLow[jj] + (sptree.nodes[sptree.nodes[j].father].age - AgeLow[jj])*(0.5+0.5*rndu());
\r
1594 for(i=0; i<1000; i++) {
\r
1595 for(j=0,nchanges=0; j<sptree.nspecies; j++) {
\r
1596 for(k=j; k!=sptree.root; k=dad) {
\r
1597 dad = sptree.nodes[k].father;
\r
1598 if(sptree.nodes[dad].age <= sptree.nodes[k].age) {
\r
1599 sptree.nodes[dad].age = max2(sptree.nodes[k].age, smallt) * (1 + smallt*0.1*rndu());
\r
1604 if(nchanges) puts("nchanges should be 0 here??");
\r
1605 if(!nchanges) break;
\r
1607 if(sptree.nodes[sptree.root].fossil == BOUND_F) {
\r
1608 if(sptree.nodes[sptree.root].age<p[0]) {
\r
1609 sptree.nodes[sptree.root].age = p[0] + (p[1]-p[0])*rndu();
\r
1614 else { /* not TipDate model */
\r
1615 /* set up initial node ages by looking at the fossil info */
\r
1616 for(j=sptree.nspecies; j<sptree.nnode; j++) {
\r
1617 sptree.nodes[j].age = -1;
\r
1618 if(sptree.nodes[j].fossil == 0) continue;
\r
1619 p = sptree.nodes[j].pfossil;
\r
1621 if(sptree.nodes[j].fossil == LOWER_F) {
\r
1622 sptree.nodes[j].age = p[0] * (1.1+0.2*rndu());
\r
1623 maxlower = max2(maxlower, p[0]);
\r
1625 else if(sptree.nodes[j].fossil == UPPER_F)
\r
1626 sptree.nodes[j].age = p[1] * (0.6+0.4*rndu());
\r
1627 else if(sptree.nodes[j].fossil == BOUND_F) {
\r
1628 sptree.nodes[j].age = p[0] + (p[1] - p[0])*(0.2+rndu()*1.6);
\r
1629 maxlower = max2(maxlower, p[0]);
\r
1631 else if(sptree.nodes[j].fossil == GAMMA_F) {
\r
1632 sptree.nodes[j].age = p[0]/p[1]*(0.7+rndu()*0.6);
\r
1633 maxlower = max2(maxlower, QuantileGamma(0.025, p[0], p[1]));
\r
1635 else if(sptree.nodes[j].fossil == SKEWN_F || sptree.nodes[j].fossil == SKEWT_F) {
\r
1636 d = p[2]/sqrt(1+p[2]*p[2]);
\r
1637 a = p[0] + p[1]*d*sqrt(2/Pi);
\r
1638 sptree.nodes[j].age = a * (0.6+0.4*rndu());
\r
1639 maxlower = max2(maxlower, a*(1.2+2*rndu()));
\r
1641 else if(sptree.nodes[j].fossil == S2N_F) {
\r
1642 d = p[3]/sqrt(1+p[3]*p[3]);
\r
1643 a = (p[1] + p[2]*d*sqrt(2/Pi)); /* mean of SN 1 */
\r
1644 d = p[6]/sqrt(1+p[6]*p[6]);
\r
1645 b = (p[4] + p[5]*d*sqrt(2/Pi)); /* mean of SN 2 */
\r
1646 sptree.nodes[j].age = (p[0]*a + b) * (0.6+0.4*rndu());
\r
1647 maxlower = max2(maxlower, (p[0]*a + b)*(1.2+2*rndu()));
\r
1651 if(sptree.nodes[sptree.root].age == -1) {
\r
1652 maxlower = max2(maxlower, sptree.RootAge[0]);
\r
1653 sptree.nodes[sptree.root].age = min2(maxlower*1.5, sptree.RootAge[1]) * (.7+.6*rndu());
\r
1655 for(i=0; i<1000; i++) {
\r
1656 if(DownSptreeSetTime(sptree.root) == 0)
\r
1662 error2("Starting times are unfeasible!\nTry again.");
\r
1664 /* initial mu (mean rates) for genes */
\r
1665 np = sptree.nspecies-1 + g;
\r
1666 for(i=0; i<g; i++)
\r
1667 data.rgene[i] = smallr + rndgamma(a_r)/b_r; /* mu */
\r
1669 if(com.clock>1) { /* sigma2, rates for nodes or branches */
\r
1671 if(mcmc.print>=2) np += g*(sptree.nnode-1);
\r
1673 /* sigma2 in lnrates among loci */
\r
1674 for(i=0; i<g; i++)
\r
1675 data.sigma2[i] = rndgamma(data.sigma2gD[0])/data.sigma2gD[1] + smallr;
\r
1676 /* rates at nodes */
\r
1677 for(j=0; j<sptree.nnode; j++) {
\r
1678 if(j==sptree.root) {
\r
1679 for(i=0; i<g; i++) sptree.nodes[j].rates[i] = -99;
\r
1682 for(i=0; i<g; i++) {
\r
1683 sptree.nodes[j].rates[i] = smallr + rndgamma(a_r)/b_r;
\r
1689 /* set up substitution parameters */
\r
1690 if(mcmc.usedata==1) {
\r
1691 for(i=0; i<g; i++)
\r
1692 if(data.datatype[i]==BASE && com.model>=K80 && !com.fix_kappa) {
\r
1693 data.kappa[i] = rndgamma(data.kappagamma[0])/data.kappagamma[1]+0.5;
\r
1696 for(i=0; i<g; i++)
\r
1697 if(data.datatype[i]==BASE && !com.fix_alpha) {
\r
1698 data.alpha[i] = rndgamma(data.alphagamma[0])/data.alphagamma[1]+0.1;
\r
1703 if(data.pfossilerror[0]) {
\r
1704 a = data.pfossilerror[0];
\r
1705 b = data.pfossilerror[1];
\r
1706 data.Pfossilerr = a/(a+b)*(0.4+0.6*rndu());
\r
1713 int collectx (FILE* fout, double x[])
\r
1715 /* this collects parameters into x[] for printing and summarizing.
\r
1716 It returns the number of parameters.
\r
1718 clock=1: times, rates for genes, kappa, alpha
\r
1719 clock=0: times, rates or rates by node by gene, sigma2, rho_ij, kappa, alpha
\r
1721 int i,j, np=0, g=data.ngene;
\r
1722 static int firsttime=1;
\r
1724 if(firsttime && fout) fprintf(fout, "Gen");
\r
1725 for(i=sptree.nspecies; i<sptree.nspecies*2-1; i++) {
\r
1726 if(firsttime && fout) fprintf(fout, "\tt_n%d", i+1);
\r
1727 x[np++] = sptree.nodes[i].age;
\r
1729 for(i=0; i<g; i++) {
\r
1730 if(firsttime && fout) {
\r
1731 if(g>1) fprintf(fout, "\tmu%d", i+1);
\r
1732 else fprintf(fout, "\tmu");
\r
1734 x[np++] = data.rgene[i];
\r
1737 for(i=0; i<g; i++) {
\r
1738 if(firsttime && fout) {
\r
1739 if(g>1) fprintf(fout, "\tsigma2_%d", i+1);
\r
1740 else fprintf(fout, "\tsigma2");
\r
1742 x[np++] = data.sigma2[i];
\r
1745 for(i=0; i<g; i++) {
\r
1746 for(j=0; j<sptree.nnode; j++) {
\r
1747 if(j==sptree.root) continue;
\r
1748 if(firsttime && fout) {
\r
1749 if(g>1) fprintf(fout, "\tr_g%d_n%d", i+1, j+1);
\r
1750 else fprintf(fout, "\tr_n%d", j+1);
\r
1752 x[np++] = sptree.nodes[j].rates[i];
\r
1756 if(mcmc.usedata==1) {
\r
1757 if(!com.fix_kappa)
\r
1758 for(i=0; i<g; i++) {
\r
1759 if(firsttime && fout) {
\r
1760 if(g>1) fprintf(fout, "\tkappa_%d", i+1);
\r
1761 else fprintf(fout, "\tkappa");
\r
1763 x[np++] = data.kappa[i];
\r
1766 if(!com.fix_alpha)
\r
1767 for(i=0; i<g; i++) {
\r
1768 if(firsttime && fout) {
\r
1769 if(g>1) fprintf(fout, "\talpha_%d", i+1);
\r
1770 else fprintf(fout, "\talpha");
\r
1772 x[np++] = data.alpha[i];
\r
1775 if(data.pfossilerror[0]) {
\r
1776 if(firsttime && fout) fprintf(fout, "\tpFossilErr");
\r
1777 x[np++] = data.Pfossilerr;
\r
1781 printf("np in collectx is %d != %d\n", np, com.np);
\r
1782 if(!mcmc.usedata && (com.model || com.alpha)) printf("\nUse JC69 for no data");
\r
1785 if(firsttime && fout && mcmc.usedata) fprintf(fout, "\tlnL");
\r
1786 if(firsttime && fout) fprintf(fout, "\n");
\r
1793 double lnpriorTimesBDS_Approach1 (void)
\r
1795 /* Approach 1 or Theorem #.# in Stadler & Yang (2013 Syst Biol). Based on Tanja's notes of 27 July 2011.
\r
1796 This calculates g(z*), which is constant throughout the MCMC, and thus wastes time.
\r
1799 double lambda=data.BDS[0], mu=data.BDS[1], rho=data.BDS[2], psi=data.BDS[3];
\r
1800 double lnp=0, t, t1=sptree.nodes[sptree.root].age, gt, gt1, a, e, c1, c2;
\r
1801 double zstar, z0, z1;
\r
1803 if(com.ndata>1 && com.TipDate)
\r
1804 error2("don't know how ndata works with TipDate.");
\r
1805 if(lambda<=0 || mu<0 || (rho<=0 && psi<=0))
\r
1806 error2("B-D-S parameters:\n lambda > 0, mu >= 0, either rho > 0 or psi > 0");
\r
1808 /* if(psi==0) the density is the same as in YR1997 */
\r
1809 if(psi==0 && fabs(lambda-mu)<1e-20) {
\r
1810 c1 = 1/t1 + rho*lambda;
\r
1811 for(j=sptree.nspecies; j<sptree.nnode; j++)
\r
1812 if(j != sptree.root) {
\r
1813 c2 = 1 + rho*lambda*sptree.nodes[j].age;
\r
1814 lnp += log(c1/(c2*c2));
\r
1817 else if(psi==0 && fabs(lambda-mu)>1e-20) {
\r
1818 a = lambda - rho*lambda - mu;
\r
1819 e = exp((mu - lambda)*t1);
\r
1820 c1 = (rho*lambda + a*e)/(1 - e);
\r
1821 if(fabs(1-e) < 1E-100)
\r
1822 printf("e too close to 1..");
\r
1823 /* a & c1 are constant over loop */
\r
1824 for(j=sptree.nspecies; j<sptree.nnode; j++)
\r
1825 if(j!=sptree.root) {
\r
1826 e = exp((mu - lambda)*sptree.nodes[j].age);
\r
1827 c2 = (lambda-mu)/(rho*lambda + a*e);
\r
1829 if(c2 < 1E-300 || c2>1E300)
\r
1830 printf("c2 not numeric.");
\r
1835 c1 = sqrt(square(lambda - mu - psi) + 4*lambda*psi);
\r
1836 c2 = -(lambda - mu - 2*lambda*rho - psi)/c1;
\r
1837 gt1 = 1/( exp(-c1*t1)*(1-c2) + (1+c2) );
\r
1838 if(gt1<-1E-300 || gt1>1E300)
\r
1839 printf("gt1 not numeric.");
\r
1841 for(j=sptree.nspecies; j<sptree.nnode; j++) {
\r
1842 if(j==sptree.root) continue;
\r
1843 for(k=sptree.nodes[j].sons[0]; k>=sptree.nspecies; k=sptree.nodes[k].sons[1]) ;
\r
1844 z0 = sptree.nodes[k].age;
\r
1845 /* printf("node %2d z[%d] = %7.4f ", j+1, k+1, z0); */
\r
1847 for(k=sptree.nodes[j].sons[1]; k>=sptree.nspecies; k=sptree.nodes[k].sons[0]) ;
\r
1848 z1 = sptree.nodes[k].age;
\r
1849 zstar = max2(z0, z1);
\r
1850 /* printf("z[%d] = %7.4f -> %7.4f\n", k+1, z1, zstar); */
\r
1852 zstar = 1/( exp(-c1*zstar)*(1-c2) + (1+c2) );
\r
1854 t = sptree.nodes[j].age;
\r
1855 gt = exp(-c1*t)*(1-c2) + (1+c2);
\r
1856 lnp += -c1*t + log( c1*(1-c2) / (gt*gt*(gt1 - zstar)) );
\r
1861 if(sptree.nodes[sptree.nspecies].fossil != BOUND_F)
\r
1862 error2("Only bounds for the root age are implemented.");
\r
1863 lnp += lnptCalibrationDensity(t1, sptree.nodes[sptree.nspecies].fossil, sptree.nodes[sptree.nspecies].pfossil);
\r
1868 double logqtp0_BDS_Approach2 (double t, double lambda, double mu, double rho, double psi, double *p0t)
\r
1870 /* This calculates p0t and log(qt) for Approach 2 of Stadler & Yang (2013 Syst Biol).
\r
1872 double c1, c2, e=0, r, logqt;
\r
1874 if(psi==0 && fabs(lambda-mu)<=1e-20) {
\r
1875 r = 1 + rho*lambda*t;
\r
1876 *p0t = 1 - rho/(1 + rho*lambda*t);
\r
1877 logqt = log(4*r*r);
\r
1879 else if(psi==0 && fabs(lambda-mu)>1e-20) {
\r
1880 e = exp((mu - lambda)*t);
\r
1881 r = (rho*lambda + (lambda-rho*lambda-mu)*e) / (lambda-mu);
\r
1882 *p0t = 1 - rho*(lambda - mu)/(rho*lambda + (lambda-rho*lambda-mu)*e);
\r
1883 logqt = log(4*r*r) - (mu - lambda)*t;
\r
1886 c1 = sqrt(square(lambda - mu - psi) + 4*lambda*psi);
\r
1887 if(c1==0) error2("c1==0");
\r
1888 c2 = -(lambda - mu - 2*lambda*rho - psi)/c1;
\r
1890 r = (e*(1-c2) - (1+c2)) / (e*(1-c2) + (1+c2));
\r
1892 *p0t = (lambda + mu + psi + c1*r) / (2*lambda);
\r
1894 logqt += log( e*e*square(1-c2) + e*2*(1-c2*c2) + square(1+c2) );
\r
1898 printf("lmrp %8.4f %8.4f %7.4f %7.4f t=%7.4f p0 logqt= %7.4f %7.4e\n", lambda, mu, rho, psi, t, *p0t, logqt);
\r
1903 double lnpriorTimesTipDate_Approach2 (void)
\r
1905 /* Approach 2 of Stadler & Yang (2013 Syst Biol). The BDS parameters are assumed to be fixed.
\r
1906 This ignores terms involving y, which are constants when the BDSS parameters are fixed.
\r
1909 int i, m=0, n=sptree.nspecies, k=0;
\r
1910 double lambda=data.BDS[0], mu=data.BDS[1], rho=data.BDS[2], psi=data.BDS[3];
\r
1911 double lnp=0, t1=sptree.nodes[sptree.root].age, p0, p1, logqt, logqt1, p0t1, z, e;
\r
1913 if(com.ndata>1 && com.TipDate)
\r
1914 error2("don't know how ndata works with TipDate.");
\r
1917 m = sptree.nspecies; n = 0;
\r
1919 else if(com.TipDate) {
\r
1920 for(i=0,m=0; i<sptree.nspecies; i++)
\r
1921 m += (sptree.nodes[i].age > 0);
\r
1922 n = sptree.nspecies - m;
\r
1925 if(lambda<=0 || mu<0 || (rho<=0 && psi<=0))
\r
1926 error2("wrong B-D-S parameters");
\r
1928 error2("we want more than 2 extant sampled species");
\r
1930 /* the numerator f(x, z | L(tmrca) = 2). */
\r
1931 logqt1 = logqtp0_BDS_Approach2(t1, lambda, mu, rho, psi, &p0t1);
\r
1933 for(i=sptree.nspecies; i<sptree.nnode; i++) { /* loop over n+m-1 internal node ages except t1 */
\r
1934 if(i == sptree.root) continue;
\r
1935 logqt = logqtp0_BDS_Approach2(sptree.nodes[i].age, lambda, mu, rho, psi, &p0);
\r
1939 /* the denominator, h(tmrca, n) */
\r
1941 if(fabs(lambda-mu-psi) > 1e-20) {
\r
1942 e = exp(-(lambda - mu - psi)*t1); /* this causes overflow for large t */
\r
1943 z = lambda*rho + (lambda - lambda*rho - mu - psi)*e;
\r
1944 p1 = rho*square(lambda - mu - psi)*e/(z*z);
\r
1946 printf("lnpriorTimesTipDate: p1 = %.6f <=0 (t1 = %9.5g)\n", p1, t1);
\r
1947 lnp -= log(p1*p1);
\r
1949 z = rho*lambda*(1 - e)/z;
\r
1951 printf("z = %.4f < 0\n", z);
\r
1952 lnp -= (n - 2)*log(z);
\r
1956 if(n<=1) error2("we don't like n <= 1");
\r
1957 if(n>2) lnp -= (n - 2)*log(t1);
\r
1958 lnp += (n + 2)*log(1 + rho*lambda*t1);
\r
1961 else { /* rho = 0 for viruses */
\r
1962 lnp -= 2*log(1 - p0t1);
\r
1966 if(sptree.nodes[sptree.nspecies].fossil != BOUND_F)
\r
1967 error2("Only bounds for the root age are implemented.");
\r
1968 lnp += lnptCalibrationDensity(t1, sptree.nodes[sptree.nspecies].fossil, sptree.nodes[sptree.nspecies].pfossil);
\r
1973 double p01t_BDSEquation6Stadler2010 (double t, double lambda, double mu, double rho, double psi, double *p0t)
\r
1975 /* This calculates p0t and p1t, defined in equations 1 & 2 in Stadler (2010).
\r
1977 double c1, c2, e, r, p1t;
\r
1979 if(fabs(lambda-mu)<1e-20 && psi==0) {
\r
1980 r = 1/(1/rho + lambda*t);
\r
1984 else if(fabs(lambda-mu)>1e-20 && psi==0) {
\r
1985 e = exp((mu - lambda)*t);
\r
1986 r = rho*(lambda-mu) / (rho*lambda + (lambda-mu-rho*lambda)*e);
\r
1991 c1 = sqrt(square(lambda - mu - psi) + 4*lambda*psi);
\r
1992 if(c1==0) error2("c1==0");
\r
1993 c2 = -(lambda - mu - 2*lambda*rho - psi)/c1;
\r
1995 r = (e*(1-c2) - (1+c2)) / (e*(1-c2) + (1+c2));
\r
1997 *p0t = (lambda + mu + psi + c1*r) / (2*lambda);
\r
1998 p1t = 4*rho/( 2*(1-c2*c2) + e*square(1-c2) + square(1+c2)/e );
\r
2001 /* printf("lmrp %8.6f %8.6f %7.4f %7.4f t=%7.4f p01= %7.4f %7.4f\n", lambda, mu, rho, psi, t, *p0t, p1t); */
\r
2006 double lnpriorTimesTipDateEquation6Stadler2010 (void)
\r
2008 /* Equation 6 in Stadler T. 2010. Sampling-through-time in birth-death trees.
\r
2009 J Theor Biol 267:396-404.
\r
2013 double lambda=data.BDS[0], mu=data.BDS[1], rho=data.BDS[2], psi=data.BDS[3];
\r
2014 double lnp=0, t, t1=sptree.nodes[sptree.root].age, p0, p1, z, e;
\r
2015 double a, b, tailL, tailR, thetaL, thetaR;
\r
2017 if(com.ndata>1 && com.TipDate)
\r
2018 error2("don't know how ndata works with TipDate.");
\r
2020 if(lambda<=0 || mu<=0 || (rho<=0 && psi<=0))
\r
2021 error2("wrong B-D-S parameters..");
\r
2022 for(i=0,m=0; i<sptree.nspecies; i++)
\r
2023 m += (sptree.nodes[i].age > 0);
\r
2024 n = sptree.nspecies - m;
\r
2027 if(fabs(lambda-mu)<1e-20)
\r
2028 z = 1 + 1/(rho*lambda*t1);
\r
2030 e = exp((mu-lambda)*t1);
\r
2031 z = (lambda*rho + (lambda-mu-lambda*rho)*e) / (rho*lambda*(1-e));
\r
2033 lnp = (n-2)*log(z*z);
\r
2036 p1 = p01t_BDSEquation6Stadler2010(t1, lambda, mu, rho, 0, &p0);
\r
2037 lnp -= log((n-1)*p1*p1);
\r
2039 /* this term is constant when the BDS parameters are fixed.
\r
2040 lnp += (n+m-2)*log(lambda) + (k+m)*log(psi);
\r
2043 p1 = p01t_BDSEquation6Stadler2010(t1, lambda, mu, rho, psi, &p0);
\r
2045 for(i=sptree.nspecies; i<sptree.nnode; i++) /* loop over n+m-1 internal node ages except t1 */
\r
2046 if(i != sptree.root)
\r
2047 lnp += log( p01t_BDSEquation6Stadler2010(sptree.nodes[i].age, lambda, mu, rho, psi, &p0) );
\r
2049 /* the y terms do not change when the BDS parameters are fixed.
\r
2050 for(i=0; i<sptree.nspecies; i++) {
\r
2051 if( (t=sptree.nodes[i].age) ) {
\r
2052 p1 = p01t_BDSEquation6Stadler2010(t, lambda, mu, rho, psi, &p0);
\r
2053 lnp += log(p0/p1);
\r
2059 if(sptree.nodes[sptree.nspecies].fossil != BOUND_F)
\r
2060 error2("Only bounds for the root age are implemented.");
\r
2061 lnp += lnptCalibrationDensity(t1, sptree.nodes[sptree.nspecies].fossil, sptree.nodes[sptree.nspecies].pfossil);
\r
2066 #define P0t_YR07(expmlt) (rho*(lambda-mu)/(rho*lambda +(lambda*(1-rho)-mu)*expmlt))
\r
2068 double lnPDFkernelBDS_YR07 (double t, double t1, double vt1, double lambda, double mu, double rho)
\r
2070 /* this calculates the log of the pdf of the BDS kernel.
\r
2072 double lnp, mlt=(mu-lambda)*t, expmlt, small=1e-20;
\r
2074 if(fabs(mu-lambda)<small)
\r
2075 lnp = log( (1 + rho*lambda*t1)/(t1*square(1 + rho*lambda*t)) );
\r
2077 expmlt = exp(mlt);
\r
2078 lnp = P0t_YR07(expmlt);
\r
2079 if(lnp<0) puts("this should not be negative, in lnPDFkernelBDS_YR07");
\r
2080 lnp = log( lnp*lnp * lambda/(vt1*rho) * expmlt );
\r
2085 double CDFkernelBDS_YR07 (double t, double t1, double vt1, double lambda, double mu, double rho)
\r
2087 /* this calculates the CDF for the kernel density from the BDS model
\r
2089 double cdf, expmlt, small=1e-20;
\r
2091 if(fabs(lambda - mu)<small)
\r
2092 cdf = (1 + rho*lambda*t1)*t/(t1*(1 + rho*lambda*t));
\r
2094 expmlt = exp((mu - lambda)*t);
\r
2096 cdf = rho*lambda/vt1 * (1 - expmlt)/(rho*lambda + (lambda*(1 - rho) - mu)*expmlt);
\r
2098 expmlt = 1/expmlt;
\r
2099 cdf = rho*lambda/vt1 * (expmlt - 1)/(rho*lambda*expmlt +(lambda*(1 - rho) - mu));
\r
2106 double lnPDFkernelBeta (double t, double t1, double p, double q)
\r
2108 /* This calculates the PDF for the beta kernel.
\r
2109 The normalizing constant is calculated outside this routine.
\r
2111 double lnp, x=t/t1;
\r
2113 if(x<0 || x>1) error2("t outside of range (0, t1)");
\r
2114 lnp = (p - 1)*log(x) + (q - 1)*log(1 - x);
\r
2120 double lnptNCgiventC (void)
\r
2122 /* This calculates f_BDS(tNC|tC), the conditional probability of no-calibration
\r
2123 node ages given calibration node ages, that is, the first term in equation 3
\r
2124 in Yang & Rannala (2006). This is called by lnpriorTimes().
\r
2126 The beta kernel is added on 5 October 2009, specified by data.priortime=1.
\r
2128 The routine uses sptree.nodes[].pfossil, sptree.nodes[].fossil,
\r
2129 and lambda, mu, rho from the birth-death process with species sampling
\r
2131 It sorts the node ages in the species tree and then uses the
\r
2132 birth-death prior conditional on the calibration points.
\r
2133 t[0] is t1 in Yang & Rannala (2006).
\r
2135 rankt[3]=1 means that the 3rd yougest age is node number ns+1. This is set up
\r
2136 for the calibration nodes only, = -1 for non-calibration nodes. First we collect
\r
2137 all times into t[] and sort them. Next we collect all calibration times into tc[]
\r
2138 and sort them. Third, we find the ranks of calibration times in tc[], that is,
\r
2139 i1, i2, ..., ic in YB06.
\r
2141 The term for root is not in this routine. The root is excluded from the ranking.
\r
2143 int i,j,k, n1=sptree.nspecies-1, rankprev, nfossil=0;
\r
2144 int ranktc[MaxNFossils]; /* ranks of calibration nodes */
\r
2145 double t[NS-1], tc[MaxNFossils], t1=sptree.nodes[sptree.root].age;
\r
2146 double lnpt=0, expmlt=1, vt1, P0t1, cdf, cdfprev, small=1e-20;
\r
2147 double lambda=data.BDS[0], mu=data.BDS[1], rho=data.BDS[2];
\r
2148 double p=data.BDS[0], q=data.BDS[1], lnbeta=0;
\r
2151 /* (A) Calculate f(tNC), joint of the non-calibration node ages.
\r
2152 The root age is excluded, so the loop starts from j = 1.
\r
2153 In the calculation of (eq.9)/(eq.11), (s - 2)! cancels out, as does
\r
2154 the densities g(t_i_1) etc. in eq.11. After cancellation, only the
\r
2155 densities of the non-calibration node ages remain in eq.9.
\r
2157 if(data.priortime==0) { /* BDS kernel */
\r
2158 if(rho<0) error2("rho < 0");
\r
2159 /* vt1 is needed only if (lambda != mu) */
\r
2160 if(fabs(lambda-mu)>small) {
\r
2161 expmlt= exp((mu - lambda)*t1);
\r
2162 P0t1 = P0t_YR07(expmlt);
\r
2163 vt1 = 1 - P0t1/rho*expmlt;
\r
2166 P0t1 = rho/(1+rho*mu*t1);
\r
2170 for(j=1,lnpt=0; j<n1; j++) {
\r
2171 k = sptree.nspecies+j;
\r
2172 if(!sptree.nodes[k].usefossil) /* non-calibration node age */
\r
2173 lnpt += lnPDFkernelBDS_YR07(sptree.nodes[k].age, t1, vt1, lambda, mu, rho);
\r
2176 else { /* beta kernel */
\r
2177 lnbeta = LnGamma(p) + LnGamma(q) - LnGamma(p+q);
\r
2178 for(j=1,lnpt=0; j<n1; j++) {
\r
2179 k = sptree.nspecies+j;
\r
2180 if(!sptree.nodes[k].usefossil) /* non-calibration node age */
\r
2181 lnpt += lnPDFkernelBeta(sptree.nodes[k].age, t1, p, q) + lnbeta;
\r
2185 /* (B) Divide by f(tC), marginal of calibration node ages (eq.9/eq.11).
\r
2186 This goes through the calibration nodes in the order of their ages,
\r
2187 with sorted node ages stored in tc[].
\r
2189 for(j=sptree.nspecies; j<sptree.nnode; j++) {
\r
2190 t[j-sptree.nspecies] = sptree.nodes[j].age;
\r
2191 if(j!=sptree.root && sptree.nodes[j].usefossil)
\r
2192 tc[nfossil++] = sptree.nodes[j].age;
\r
2194 if(nfossil>MaxNFossils) error2("raise MaxNFossils?");
\r
2197 /* The only reason for sorting t[] is to construct ranktc[]. */
\r
2198 qsort(t, (size_t)n1, sizeof(double), comparedouble);
\r
2199 qsort(tc, (size_t)nfossil, sizeof(double), comparedouble);
\r
2201 for(i=j=0; i<nfossil; i++) {
\r
2202 if(i) j = ranktc[i-1]+1;
\r
2204 if(tc[i]<t[j]) break;
\r
2208 matout2(F0, t, 1, n1, 9, 5);
\r
2209 matout2(F0, tc, 1, nfossil, 9, 5);
\r
2210 for(i=0; i<nfossil; i++)
\r
2211 printf("%9d", ranktc[i]);
\r
2216 for(i=0,rankprev=0,cdfprev=0; i<nfossil+1; i++) {
\r
2218 if(data.priortime==0) /* BDS kernel */
\r
2219 cdf = CDFkernelBDS_YR07(tc[i], t1, vt1, lambda, mu, rho);
\r
2220 else /* beta kernel */
\r
2221 cdf = CDFBeta(tc[i]/t1, p, q, lnbeta);
\r
2223 k = ranktc[i] - rankprev - 1;
\r
2227 k = n1 - rankprev - 1;
\r
2230 if(cdf <= cdfprev) error2("cdf diff<0 in lnptNCgiventC");
\r
2231 lnpt += LnGamma(k+1.0) - k*log(cdf - cdfprev);
\r
2233 rankprev = ranktc[i];
\r
2236 if(debug==1) printf("\npdf = %.12f\n", exp(lnpt));
\r
2241 double t1=sptree.nodes[4].age, t2=sptree.nodes[5].age, t3=sptree.nodes[6].age, Gt2, Gt3;
\r
2242 Gt2 = CDFkernelBDS_YR07(t2, t1, vt1, lambda, mu, rho);
\r
2243 Gt3 = CDFkernelBDS_YR07(t3, t1, vt1, lambda, mu, rho);
\r
2244 if(sptree.nodes[5].usefossil==0 && sptree.nodes[6].usefossil==0)
\r
2245 lnpt += log(1.0/2);
\r
2246 if(sptree.nodes[5].usefossil==1 && sptree.nodes[6].usefossil==0)
\r
2247 if(t3<t2) lnpt += log(Gt2);
\r
2248 else lnpt += log(1-Gt2);
\r
2249 if(sptree.nodes[5].usefossil==0 && sptree.nodes[6].usefossil==1)
\r
2250 if(t2<t3) lnpt += log(Gt3);
\r
2251 else lnpt += log(1-Gt3);
\r
2259 double lnptCalibrationDensity(double t, int fossil, double p[7])
\r
2261 /* This calculates the log of calibration density
\r
2263 double lnpt, a=p[0], b=p[1], thetaL, thetaR, tailL=99, tailR=99, s,z, t0,P,c,A;
\r
2266 case (LOWER_F): /* truncated Cauchy C(a(1+p), s^2). tL = a. */
\r
2272 A = 0.5 + 1/Pi*atan(P/c);
\r
2275 lnpt = log((1-tailL)/(Pi*A*s*(1 + z*z)));
\r
2279 thetaL = (1/tailL-1) / (Pi*A*c*(1 + z*z));
\r
2280 lnpt = log(tailL*thetaL/a) + (thetaL-1)*log(t/a);
\r
2284 /* equation (16) in Yang and Rannala (2006) */
\r
2287 lnpt = log((1-tailR)/b);
\r
2289 thetaR = (1-tailR)/(tailR*b);
\r
2290 lnpt = log(tailR*thetaR) - thetaR*(t-b);
\r
2297 lnpt = log((1-tailL-tailR)/(b-a));
\r
2299 thetaL = (1-tailL-tailR)*a/(tailL*(b-a));
\r
2300 lnpt = log(tailL*thetaL/a) + (thetaL-1)*log(t/a);
\r
2303 thetaR = (1-tailL-tailR)/(tailR*(b-a));
\r
2304 lnpt = log(tailR*thetaR) - thetaR*(t-b);
\r
2308 lnpt = a*log(b)-b*t+(a-1)*log(t)-LnGamma(a);
\r
2311 lnpt = logPDFSkewN(t, p[0], p[1], p[2]);
\r
2314 lnpt = log(PDFSkewT(t, p[0], p[1], p[2], p[3]));
\r
2317 a = PDFSkewN(t, p[1], p[2], p[3]);
\r
2318 b = PDFSkewN(t, p[4], p[5], p[6]);
\r
2319 lnpt = log(p[0]*a + b);
\r
2326 double lnptC (void)
\r
2328 /* This calculates the prior density of times at calibration nodes as specified
\r
2329 by the fossil calibration information, the second term in equation 3 in
\r
2330 Yang & Rannala (2006).
\r
2333 The term for root is always calculated in this routine.
\r
2334 If there is a fossil at root and if it is a lower bound, it is re-set to a
\r
2337 int i,j, nfossil=0, fossil;
\r
2338 double p[7], t, lnpt=0;
\r
2341 /* RootAge[] will cause sptree.nodes[sptree.root].fossil to be set before this routine. */
\r
2342 if(sptree.nfossil==0 && sptree.nodes[sptree.root].fossil==0)
\r
2345 for(j=sptree.nspecies; j<sptree.nnode; j++) {
\r
2346 if(j!=sptree.root && !sptree.nodes[j].usefossil)
\r
2349 t = sptree.nodes[j].age;
\r
2350 fossil = sptree.nodes[j].fossil;
\r
2351 for(i=0; i<7; i++) p[i] = sptree.nodes[j].pfossil[i];
\r
2353 if(j!=sptree.root || (sptree.nodes[j].usefossil && fossil!=LOWER_F)) {
\r
2354 if(fossil==LOWER_F)
\r
2355 p[3] = sptree.nodes[j].pfossil[3];
\r
2356 else if(fossil==UPPER_F)
\r
2357 p[2] = sptree.nodes[j].pfossil[2];
\r
2358 else if(fossil==BOUND_F) {
\r
2359 p[2] = sptree.nodes[j].pfossil[2];
\r
2360 p[3] = sptree.nodes[j].pfossil[3];
\r
2363 else if(!sptree.nodes[j].usefossil) { /* root fossil absent or deleted, using RootAge */
\r
2364 p[0] = sptree.RootAge[0];
\r
2365 p[1] = sptree.RootAge[1];
\r
2366 if(sptree.RootAge[0]>0) {
\r
2368 p[2] = sptree.RootAge[2];
\r
2369 p[3] = sptree.RootAge[3];
\r
2373 p[2] = sptree.RootAge[2];
\r
2376 else if (fossil==LOWER_F) {
\r
2377 /* root fossil is L. B(a,b,tailL, tailR) is used. p & c ignored. */
\r
2379 p[1] = sptree.RootAge[1];
\r
2380 p[3] = sptree.nodes[j].pfossil[3];
\r
2381 p[2] = (sptree.RootAge[0]>0 ? sptree.RootAge[3] : sptree.RootAge[2]);
\r
2384 lnpt += lnptCalibrationDensity(t, fossil, p);
\r
2390 double getScaleFossilCombination (void);
\r
2392 double getScaleFossilCombination (void)
\r
2394 /* This uses Monte Carlo integration to calculate the normalizing constant for the
\r
2395 joint prior on fossil times, when some fossils are assumed to be in error and not
\r
2396 used. The constant is the integral of the density over the feasible region, where
\r
2397 the times satisfy the age constraints on the tree.
\r
2398 It is assumed that the ancestral nodes have smaller node numbers than descendent
\r
2399 nodes, as is the case if the node numbers are assigned by ReadTreeN().
\r
2400 nfs = nfossilused if a fossil is used for the root or nfs = nfossilused + 1
\r
2402 CLower[] contains constants for lower bounds, to avoid duplicated computation.
\r
2404 int nrepl=5000000, ir, i,j,k, nfs,ifs[MaxNFossils]={0}, feasible;
\r
2405 int root=sptree.root, rootfossil = sptree.nodes[root].fossil, fossil;
\r
2406 signed char ConstraintTab[MaxNFossils][MaxNFossils]={{0}}; /* 1: row>col; 0: irrelevant */
\r
2407 double C, sC, accept, mt[MaxNFossils]={0}, t[MaxNFossils], *pfossil[MaxNFossils], pfossilroot[4];
\r
2408 double importance, CLower[MaxNFossils][3];
\r
2409 double sNormal=1, r, thetaL, thetaR, a, b, c,p,s,A,zc,zn, tailL,tailR;
\r
2412 /* Set up constraint table */
\r
2413 for(i=sptree.nspecies,nfs=0; i<sptree.nnode; i++) {
\r
2414 if(i==root || sptree.nodes[i].usefossil)
\r
2417 for(i=1; i<nfs; i++) {
\r
2418 for(j=0; j<i; j++) {
\r
2419 for(k=ifs[i]; k!=-1; k=sptree.nodes[k].father) {
\r
2420 if(k == ifs[j]) { ConstraintTab[i][j] = 1; break; }
\r
2425 for(i=0; i<nfs; i++) {
\r
2426 printf("\n%d (%2d %s): ", i+1, ifs[i]+1, fossils[sptree.nodes[ifs[i]].fossil]);
\r
2427 for(j=0; j<i; j++)
\r
2428 printf("%4d", ConstraintTab[i][j]);
\r
2433 /* Set up calibration info. Save constants for lower bounds */
\r
2434 for(i=0; i<4; i++)
\r
2435 pfossilroot[i] = sptree.nodes[root].pfossil[i];
\r
2436 if(ifs[0] == root) { /* copy info for root into pfossilroot[] */
\r
2437 if(!sptree.nodes[root].usefossil) { /* root fossil absent or excluded */
\r
2438 for(i=0; i<4; i++)
\r
2439 pfossilroot[i] = sptree.RootAge[i];
\r
2440 rootfossil = (sptree.RootAge[0]>0 ? BOUND_F : UPPER_F);
\r
2442 else if (sptree.nodes[root].fossil==LOWER_F) { /* root fossil is lower bound */
\r
2443 pfossilroot[1] = sptree.RootAge[1];
\r
2444 rootfossil = BOUND_F;
\r
2447 for(i=0; i<nfs; i++) {
\r
2449 pfossil[i] = (j==root ? pfossilroot : sptree.nodes[j].pfossil);
\r
2451 if(j!=root && sptree.nodes[j].fossil==LOWER_F) {
\r
2452 a = pfossil[i][0];
\r
2453 p = pfossil[i][1];
\r
2454 c = pfossil[i][2];
\r
2455 tailL = pfossil[i][3];
\r
2457 A = 0.5 + 1/Pi*atan(p/c);
\r
2458 CLower[i][0] = (1/tailL-1) / (Pi*A*c*(1 + (p/c)*(p/c))); /* thetaCauchy */
\r
2459 CLower[i][1] = (1/tailL-1) * a/(s*sqrt(Pi/2)); /* thetaNormal */
\r
2460 CLower[i][2] = s/(A*c*a*sqrt(2*Pi)); /* s/Act*sqrt(2Pi) */
\r
2464 /* Take samples */
\r
2465 for(ir=0,C=sC=accept=0; ir<nrepl; ir++) {
\r
2466 for(i=0,importance=1; i<nfs; i++) {
\r
2468 fossil = (j==root ? rootfossil : sptree.nodes[j].fossil);
\r
2469 a = pfossil[i][0];
\r
2470 b = pfossil[i][1];
\r
2473 case(LOWER_F): /* simulating from folded normal, the importance density */
\r
2474 tailL = pfossil[i][3];
\r
2475 if (r < tailL) { /* left tail, CLower[i][1] has thetaNormal */
\r
2476 thetaL = CLower[i][1];
\r
2477 t[i] = a * pow(rndu(), 1/thetaL);
\r
2478 importance *= CLower[i][0]/thetaL * pow(t[i]/a, CLower[i][0]-thetaL);
\r
2481 c = pfossil[i][2];
\r
2483 t[i] = a + fabs(rndNormal()) * s;
\r
2484 zc = (t[i] - a*(1+b))/(c*a);
\r
2485 zn = (t[i] - a)/s;
\r
2486 importance *= CLower[i][2] * exp(zn*zn/2) / (1+zc*zc);
\r
2490 tailR = pfossil[i][2];
\r
2491 if(r > tailR) { /* flat part */
\r
2494 else { /* right tail */
\r
2495 thetaR = (1-tailR)/(tailR*b);
\r
2496 t[i] = b - log(rndu())/thetaR;
\r
2500 tailL = pfossil[i][2];
\r
2501 tailR = pfossil[i][3];
\r
2502 if(r > tailL + tailR) /* flat part */
\r
2503 t[i] = a + (b - a) * rndu();
\r
2504 else if (r < tailL) { /* left tail */
\r
2505 thetaL = (1-tailL-tailR)*a/(tailL*(b-a));
\r
2506 t[i] = a * pow(rndu(), 1/thetaL);
\r
2508 else { /* right tail */
\r
2509 thetaR = (1-tailL-tailR)/(tailR*(b-a));
\r
2510 t[i] = b - log(rndu())/thetaR;
\r
2514 t[i] = rndgamma(a)/b;
\r
2517 printf("\nfossil = %d (%s) not implemented.\n", fossil, fossils[fossil]);
\r
2523 for(i=1; i<nfs; i++) {
\r
2524 for(j=0; j<i; j++)
\r
2525 if(ConstraintTab[i][j] && t[i]>t[j])
\r
2526 { feasible=0; break; }
\r
2527 if(feasible==0) break;
\r
2532 sC += importance*importance;
\r
2533 for(i=0; i<nfs; i++)
\r
2534 mt[i] += t[i]*importance;
\r
2536 if((ir+1)%100000 == 0) {
\r
2538 s = (sC/a - C/a*C/a) / a;
\r
2539 printf("\r%7d %.2f%% C = %.6f +- %.6f mt", ir+1, accept/(ir+1)*100, C/(ir+1), sqrt(s));
\r
2540 for(i=0; i<nfs; i++)
\r
2541 printf(" %6.4f", mt[i]/C);
\r
2548 int SetupPriorTimesFossilErrors (void)
\r
2550 /* This prepares for lnpriorTimes() under models of fossil errors. It calculates
\r
2551 the scaling factor for the probability density of times for a given combination
\r
2552 of the indicator variables, indicating which fossil is in error and not used.
\r
2553 nMinCorrect is the minimum number of correct fossils.
\r
2555 int nMinCorrect = (int)data.pfossilerror[2], ncomFerr, is,i, icom, nused=0, it;
\r
2559 if(data.pfossilerror[0]==0 || sptree.nfossil>MaxNFossils || sptree.nfossil<nMinCorrect)
\r
2560 error2("Fossil-error model is inappropriate.");
\r
2562 for (i=nMinCorrect,ncomFerr=0; i<=sptree.nfossil; i++) {
\r
2563 ncomFerr += (int)( Binomial((double)sptree.nfossil, i, &t) + .1 );
\r
2564 if(t!=0) error2("too many fossils to deal with? ");
\r
2567 data.CcomFossilErr = (double*)malloc(ncomFerr*sizeof(double));
\r
2568 if(data.CcomFossilErr == NULL) error2("oom for CcomFossilErr");
\r
2570 /* cycle through combinations of indicators. */
\r
2571 for (i=0,icom=0; i < (1<<sptree.nfossil); i++) {
\r
2573 for (is=sptree.nspecies, nused=0; is<sptree.nnode; is++) {
\r
2574 if(sptree.nodes[is].fossil) {
\r
2575 sptree.nodes[is].usefossil = 1 - it%2;
\r
2576 nused += sptree.nodes[is].usefossil;
\r
2578 if(debug) printf("%2d (%2d)", sptree.nodes[is].usefossil, is+1);
\r
2581 if(nused<nMinCorrect) continue;
\r
2582 if(debug) printf("\n\n********* Combination %2d/%2d: %2d fossils used", icom+1, ncomFerr, nused);
\r
2583 data.CcomFossilErr[icom++] = log( getScaleFossilCombination() );
\r
2590 double lnpriorTimes (void)
\r
2592 /* This calculates the prior density of node times in the master species tree.
\r
2593 Node ages are in sptree.nodes[].age.
\r
2595 int nMinCorrect = (int)data.pfossilerror[2];
\r
2596 int is, i,icom, nused, it;
\r
2597 double pE = data.Pfossilerr, lnpE, ln1pE, pEconst=0, lnC, lnNC;
\r
2598 double lnpt=0, scaleF=-1e300, y;
\r
2601 if(sptree.nfossil==0 || com.TipDate) {
\r
2602 if(1) lnpt = lnpriorTimesBDS_Approach1();
\r
2603 else if(0) lnpt = lnpriorTimesTipDate_Approach2();
\r
2604 else if(0) lnpt = lnpriorTimesTipDateEquation6Stadler2010();
\r
2606 else if(data.pfossilerror[0]==0) { /* no fossil errors in model */
\r
2608 lnNC = lnptNCgiventC();
\r
2609 lnpt = lnC + lnNC;
\r
2611 if(debug) printf("\nftC ftNC ft = %9.5f%9.5f%9.5f", exp(lnC), exp(lnNC), exp(lnC)*exp(lnNC));
\r
2613 else { /* fossil errors: cycle through combinations of indicators, using nMinCorrect. */
\r
2615 pEconst = y = pow(pE, (double)sptree.nfossil);
\r
2616 if(y<1e-300) error2("many fossils & small pE. Rewrite the code?");
\r
2617 for(i=1; i<nMinCorrect; i++) /* i is the number of correct fossils used */
\r
2618 pEconst += y *= (sptree.nfossil-i+1)*(1-pE)/(i*pE);
\r
2619 pEconst = log(1 - pEconst);
\r
2621 lnpE=log(pE); ln1pE=log(1-pE);
\r
2622 for(i=0,icom=0; i < (1<<sptree.nfossil); i++) { /* sum over U */
\r
2624 for(is=sptree.nspecies, nused=0; is<sptree.nnode; is++) {
\r
2625 if(sptree.nodes[is].fossil) {
\r
2626 sptree.nodes[is].usefossil = 1 - it%2;
\r
2627 nused += sptree.nodes[is].usefossil;
\r
2631 if(nused<nMinCorrect) continue;
\r
2633 lnNC = lnptNCgiventC();
\r
2635 y = nused*ln1pE + (sptree.nfossil-nused)*lnpE - pEconst - data.CcomFossilErr[icom++]
\r
2640 printf("\nU%d%d ftC ftNC ft = %9.5f%9.5f%9.5f", sptree.nodes[5].usefossil, sptree.nodes[6].usefossil,
\r
2641 exp(lnC), exp(lnNC), exp(lnC)*exp(lnNC));
\r
2643 if(y < scaleF + 100)
\r
2644 lnpt += exp(y-scaleF);
\r
2649 lnpt += exp(y-scaleF);
\r
2651 lnpt = scaleF + log(lnpt);
\r
2653 if(debug) exit(0);
\r
2658 int getPfossilerr (double postEFossil[], double nround)
\r
2660 /* This is modified from lnpriorTimes(), and prints out the conditonal Perror
\r
2661 for each fossil given the times and pE.
\r
2663 int nMinCorrect = (int)data.pfossilerror[2];
\r
2664 int is,i,icom, k, nf=sptree.nfossil, nused, it;
\r
2665 double pE = data.Pfossilerr, lnpE, ln1pE, pEconst=0;
\r
2666 double pt, pU[MaxNFossils]={0}, scaleF=-1e300, y;
\r
2667 char U[MaxNFossils]; /* indicators of fossil errors */
\r
2669 if(data.priortime==1)
\r
2670 error2("beta kernel for prior time not yet implemented for model of fossil errors.");
\r
2672 pEconst = y = pow(pE, (double)sptree.nfossil);
\r
2673 for(i=1; i<nMinCorrect; i++) /* i is the number of correct fossils used */
\r
2674 pEconst += y *= (sptree.nfossil-i+1)*(1-pE)/(i*pE);
\r
2675 pEconst = log(1 - pEconst);
\r
2678 lnpE=log(pE); ln1pE=log(1-pE);
\r
2679 for(i=0,icom=0,pt=0; i < (1<<sptree.nfossil); i++) { /* sum over U */
\r
2681 for(is=sptree.nspecies, k=0, nused=0; is<sptree.nnode; is++) {
\r
2682 if(sptree.nodes[is].fossil) {
\r
2683 sptree.nodes[is].usefossil = 1 - it%2;
\r
2684 nused += sptree.nodes[is].usefossil;
\r
2685 U[k++] = !sptree.nodes[is].usefossil;
\r
2689 if(nused<nMinCorrect) continue;
\r
2690 if(k != nf) error2("k != nf in getPfossilerr()?");
\r
2692 y = nused*ln1pE+(nf-nused)*lnpE - pEconst - data.CcomFossilErr[icom++] + lnptC() + lnptNCgiventC();
\r
2694 if(y < scaleF + 100) {
\r
2695 pt += y = exp(y-scaleF);
\r
2696 for(k=0; k<nf; k++)
\r
2697 if(U[k]) pU[k] += y;
\r
2699 else { /* change scaleF */
\r
2702 for(k=0; k<nf; k++) pU[k] = U[k]; /* 1 if U[k]=1 or 0 if U[k]=0 */
\r
2705 for(k=0; k<nf; k++) pU[k] /= pt;
\r
2706 for(k=0; k<nf; k++)
\r
2707 postEFossil[k] = (postEFossil[k]*(nround-1) + pU[k])/nround;
\r
2714 int LabelOldCondP (int spnode)
\r
2716 /* This sets com.oldconP[j]=0 if node j in the gene tree needs updating, after
\r
2717 either rates or times have changed for spnode in the species tree. This is to
\r
2718 avoid duplicated computation of conditional probabilities. The routine workes
\r
2719 on the current gene tree and accounts for the fact that some species may be
\r
2720 missing at some loci.
\r
2721 The routine first finds spnode or its first ancestor that is present in the
\r
2722 gene tree, then identifies that node in the genetree. This reveals the
\r
2723 oldest node j in the gene tree that is descendent of spnode. Node j and all
\r
2724 its ancestors in the gene tree need updating.
\r
2726 Before calling this routine, set com.oldconP[]=1. This routine changes some
\r
2727 com.oldconP[] into 0 but do not change any 0 to 1.
\r
2729 The gene tree is in nodes[], as UseLocus has been called prior to this.
\r
2730 This is called by UpdateTimes and UpdateRates.
\r
2734 if(j>=tree.nnode || j!=nodes[j].ipop) {
\r
2736 /* From among spnode and its ancestors in the species tree, find the
\r
2737 first node, i, that is in genetree.
\r
2739 for(i=spnode; i!=-1; i=sptree.nodes[i].father) {
\r
2741 /* Find that node in genetree that is node i in species tree.
\r
2742 Its descendent, node j, is the oldest node in gene tree that is
\r
2743 descendent of spnode.
\r
2745 for(j=0; j<tree.nnode && nodes[j].ipop!=i; j++) ;
\r
2747 if(j<tree.nnode) break;
\r
2752 for( ; ; j=nodes[j].father) {
\r
2753 com.oldconP[j] = 0;
\r
2754 if(j==tree.root) break;
\r
2761 double UpdateTimes (double *lnL, double finetune)
\r
2763 /* This updates the node ages in the master species tree sptree.nodes[].age,
\r
2765 int locus, is, i, *sons, dad;
\r
2766 double naccept=0, t,tnew, tson[2], yb[2], y,ynew;
\r
2767 double lnacceptance, lnLd, lnpDinew[NGENE], lnpTnew, lnpRnew=-1;
\r
2769 if(finetune<=0) error2("steplength = 0 in UpdateTimes");
\r
2770 for(is=sptree.nspecies; is<sptree.nnode; is++) {
\r
2771 t = sptree.nodes[is].age;
\r
2772 sons = sptree.nodes[is].sons;
\r
2773 dad = sptree.nodes[is].father;
\r
2774 tson[0] = sptree.nodes[sons[0]].age;
\r
2775 tson[1] = sptree.nodes[sons[1]].age;
\r
2776 y = max2(tson[0], tson[1]);
\r
2777 yb[0] = (y>0 ? log(y) : -99);
\r
2778 if(is != sptree.root) yb[1] = log(sptree.nodes[dad].age);
\r
2782 ynew = y + finetune*rndSymmetrical();
\r
2783 ynew = reflect(ynew, yb[0], yb[1]);
\r
2784 sptree.nodes[is].age = tnew = exp(ynew);
\r
2786 lnacceptance = ynew - y;
\r
2787 lnpTnew = lnpriorTimes();
\r
2788 lnacceptance += lnpTnew - data.lnpT;
\r
2789 if(com.clock==3) {
\r
2790 lnpRnew = lnpriorRates();
\r
2791 lnacceptance += lnpRnew - data.lnpR;
\r
2794 for(locus=0,lnLd=0; locus<data.ngene; locus++) {
\r
2795 UseLocus(locus, 1, mcmc.usedata, 0);
\r
2797 if(mcmc.saveconP) {
\r
2798 for(i=0;i<sptree.nnode;i++) com.oldconP[i]=1;
\r
2799 LabelOldCondP(is);
\r
2801 if(com.oldconP[tree.root])
\r
2802 lnpDinew[locus] = data.lnpDi[locus];
\r
2804 lnpDinew[locus] = lnpD_locus(locus);
\r
2805 lnLd += lnpDinew[locus] - data.lnpDi[locus];
\r
2807 lnacceptance += lnLd;
\r
2809 if(debug==2) printf("species %2d t %8.4f %8.4f %9.2f %9.2f", is, t, tnew, lnLd, lnacceptance);
\r
2811 if(lnacceptance>=0 || rndu()<exp(lnacceptance)) {
\r
2813 data.lnpT = lnpTnew;
\r
2814 if(com.clock==3) data.lnpR = lnpRnew;
\r
2815 for(locus=0; locus<data.ngene; locus++)
\r
2816 data.lnpDi[locus] = lnpDinew[locus];
\r
2818 if(mcmc.usedata==1) switchconPin();
\r
2819 if(debug==2) printf(" Y (%4d)\n", NPMat);
\r
2822 sptree.nodes[is].age = t;
\r
2823 if(debug==2) printf(" N (%4d)\n", NPMat);
\r
2824 for(locus=0; locus<data.ngene; locus++)
\r
2825 AcceptRejectLocus(locus, 0); /* reposition conP */
\r
2829 return(naccept/(sptree.nspecies-1.));
\r
2833 #if (0) /* this is not used now. */
\r
2835 double UpdateTimesClock23 (double *lnL, double finetune)
\r
2837 /* This updates the node ages in the master species tree sptree.nodes[].age,
\r
2838 one by one. It simultaneously changes the rates at the three branches
\r
2839 around the node so that the branch lengths remain the same, and there is
\r
2840 no need to update the lnL. Sliding window on the logarithm of ages.
\r
2842 int is, i, *sons, dad;
\r
2843 double naccept=0, tb[2],t,tnew, tson[2], yb[2],y,ynew, rateratio[3];
\r
2844 double lnacceptance, lnpTnew,lnpRnew;
\r
2846 if(debug==2) puts("\nUpdateTimesClock23 ");
\r
2847 if(finetune<=0) error2("steplength = 0 in UpdateTimesClock23");
\r
2849 for(is=sptree.nspecies; is<sptree.nnode; is++) {
\r
2850 t = sptree.nodes[is].age;
\r
2851 sons = sptree.nodes[is].sons;
\r
2852 tson[0] = sptree.nodes[sons[0]].age;
\r
2853 tson[1] = sptree.nodes[sons[1]].age;
\r
2854 dad = sptree.nodes[is].father;
\r
2855 tb[0] = max2(tson[0], tson[1]);
\r
2856 yb[0] = (tb[0]>0 ? log(tb[0]) : -99);
\r
2857 if(is != sptree.root) yb[1] = log(tb[1] = sptree.nodes[dad].age);
\r
2861 ynew = y + finetune*rndSymmetrical();
\r
2862 ynew = reflect(ynew, yb[0], yb[1]);
\r
2863 sptree.nodes[is].age = tnew = exp(ynew);
\r
2864 lnacceptance = ynew - y;
\r
2866 /* Thorne et al. (1998) equation 9. */
\r
2867 rateratio[0] = (t-tson[0])/(tnew-tson[0]);
\r
2868 rateratio[1] = (t-tson[1])/(tnew-tson[1]);
\r
2869 for(i=0; i<data.ngene; i++) {
\r
2870 sptree.nodes[sons[0]].rates[i] *= rateratio[0];
\r
2871 sptree.nodes[sons[1]].rates[i] *= rateratio[1];
\r
2873 lnacceptance += data.ngene*log(rateratio[0]*rateratio[1]);
\r
2874 if(is != sptree.root) {
\r
2875 rateratio[2] = (t-tb[1])/(tnew-tb[1]);
\r
2876 for(i=0; i<data.ngene; i++)
\r
2877 sptree.nodes[is].rates[i] *= rateratio[2];
\r
2878 lnacceptance += data.ngene*log(rateratio[2]);
\r
2881 lnpTnew = lnpriorTimes();
\r
2882 lnacceptance += lnpTnew - data.lnpT;
\r
2883 lnpRnew = lnpriorRates();
\r
2884 lnacceptance += lnpRnew - data.lnpR;
\r
2886 if(debug==2) printf("species %2d t %8.4f %8.4f", is,t,tnew);
\r
2888 if(lnacceptance>=0 || rndu()<exp(lnacceptance)) {
\r
2890 data.lnpT = lnpTnew;
\r
2891 data.lnpR = lnpRnew;
\r
2892 if(debug==2) printf(" Y (%4d)\n", NPMat);
\r
2895 sptree.nodes[is].age = t;
\r
2896 for(i=0; i<data.ngene; i++) {
\r
2897 sptree.nodes[sons[0]].rates[i] /= rateratio[0];
\r
2898 sptree.nodes[sons[1]].rates[i] /= rateratio[1];
\r
2899 if(is!=sptree.root) sptree.nodes[is].rates[i] /= rateratio[2];
\r
2901 if(debug==2) printf(" N (%4d)\n", NPMat);
\r
2904 return(naccept/(sptree.nspecies-1.));
\r
2911 void getSinvDetS (double space[])
\r
2913 /* This uses the variance-correlation matrix data.correl[g*g] to constructs
\r
2914 Sinv (inverse of S) and detS (|S|). It also copies the variances into
\r
2915 data.sigma2[g]. This is called every time data.correl is updated.
\r
2917 What restrictions should be placed on data.correl[]???
\r
2921 int i, j, g=data.ngene;
\r
2924 for(i=0; i<g; i++)
\r
2925 data.Sinv[i*g+i] = data.sigma2[i] = data.correl[i*g+i];
\r
2926 for(i=0; i<g; i++) {
\r
2927 for(j=0; j<i; j++)
\r
2928 data.Sinv[i*g+j] = data.Sinv[j*g+i]
\r
2929 = data.correl[i*g+j]*sqrt(data.sigma2[i]*data.sigma2[j]);
\r
2933 printf("\ncorrel & S & Sinv ");
\r
2934 matout(F0, data.correl, g, g);
\r
2935 matout(F0, data.Sinv, g, g);
\r
2938 matinv (data.Sinv, g, g, space);
\r
2939 data.detS = fabs(space[0]);
\r
2942 matout(F0, data.Sinv, g, g);
\r
2943 printf("|S| = %.6g\n", data.detS);
\r
2946 error2("detS < 0");
\r
2951 double lnpriorRates (void)
\r
2953 /* This calculates the log of the prior of rates under the two rate-drift models:
\r
2954 the independent rates (clock=2) and the geometric Brownian motion model (clock=3).
\r
2956 clock=2: the algorithm cycles through the branches, and add up the log
\r
2958 clock=3: the root rate is mu or data.rgene[]. The algorithm cycles through
\r
2959 the ancestral nodes and deals with the two daughter branches.
\r
2961 int i, inode, locus, dad=-1, g=data.ngene, s=sptree.nspecies, sons[2];
\r
2962 double lnpR=-log(2*Pi)/2.0*(2*s-2)*g, t,tA,t1,t2,Tinv[4], detT;
\r
2963 double zz, r=-1, rA,r1,r2, y1,y2;
\r
2966 if(com.clock==3 && data.priorrate==1)
\r
2967 error2("gamma prior for rates for clock3 not implemented yet.");
\r
2968 else if(com.clock==2 && data.priorrate==1) { /* clock2, gamma rate prior */
\r
2970 for(locus=0; locus<g; locus++) {
\r
2971 a = data.rgene[locus]*data.rgene[locus]/data.sigma2[locus];
\r
2972 b = data.rgene[locus]/data.sigma2[locus];
\r
2973 lnpR += (a*log(b) - LnGamma(a)) * (2*s-2);
\r
2974 for(inode=0; inode<sptree.nnode; inode++) {
\r
2975 if(inode==sptree.root) continue;
\r
2976 r = sptree.nodes[inode].rates[locus];
\r
2977 lnpR += -b*r + (a-1)*log(r);
\r
2981 else if(com.clock==2 && data.priorrate ==0) { /* clock2, LN rate prior */
\r
2982 for(locus=0; locus<g; locus++)
\r
2983 lnpR -= log(data.sigma2[locus])/2.*(2*s-2);
\r
2984 for(inode=0; inode<sptree.nnode; inode++) {
\r
2985 if(inode==sptree.root) continue;
\r
2986 for(locus=0; locus<g; locus++) {
\r
2987 r = sptree.nodes[inode].rates[locus];
\r
2988 zz = log(r/data.rgene[locus]) + data.sigma2[locus]/2;
\r
2989 lnpR += -zz*zz/(2*data.sigma2[locus]) - log(r);
\r
2993 else if(com.clock==3 && data.priorrate ==0) { /* clock3, LN rate prior */
\r
2994 for(inode=0; inode<sptree.nnode; inode++) {
\r
2995 if(sptree.nodes[inode].nson==0) continue; /* skip the tips */
\r
2996 dad = sptree.nodes[inode].father;
\r
2997 for(i=0; i<2; i++) sons[i] = sptree.nodes[inode].sons[i];
\r
2998 t = sptree.nodes[inode].age;
\r
2999 if(inode==sptree.root) tA = 0;
\r
3000 else tA = (sptree.nodes[dad].age - t)/2;
\r
3001 t1 = (t-sptree.nodes[sons[0]].age)/2;
\r
3002 t2 = (t-sptree.nodes[sons[1]].age)/2;
\r
3003 detT = t1*t2+tA*(t1+t2);
\r
3004 Tinv[0] = (tA+t2)/detT;
\r
3005 Tinv[1] = Tinv[2] = -tA/detT;
\r
3006 Tinv[3] = (tA+t1)/detT;
\r
3007 for(locus=0; locus<g; locus++) {
\r
3008 rA = (inode==sptree.root ? data.rgene[locus] : sptree.nodes[inode].rates[locus]);
\r
3009 r1 = sptree.nodes[sons[0]].rates[locus];
\r
3010 r2 = sptree.nodes[sons[1]].rates[locus];
\r
3011 y1 = log(r1/rA)+(tA+t1)*data.sigma2[locus]/2;
\r
3012 y2 = log(r2/rA)+(tA+t2)*data.sigma2[locus]/2;
\r
3013 zz = (y1*y1*Tinv[0]+2*y1*y2*Tinv[1]+y2*y2*Tinv[3]);
\r
3014 lnpR -= zz/(2*data.sigma2[locus]) + log(detT*square(data.sigma2[locus]))/2 + log(r1*r2);
\r
3021 double lnpriorRatioRates (int locus, int inodeChanged, double rold)
\r
3023 /* This calculates the lnpriorRatio when one rate is changed.
\r
3024 If inodeChanged is tip, we sum over 1 term (equation 7 in RY2007).
\r
3025 If inodeChanged is not tip, we sum over 2 terms.
\r
3027 double rnew=sptree.nodes[inodeChanged].rates[locus], lnpRd=0, a, b, z, znew;
\r
3028 double zz, r=-1, rA,r1,r2, y1,y2, t,tA,t1,t2,Tinv[4], detT;
\r
3029 int i, inode, ir, dad=-1, sons[2], OldNew;
\r
3031 if(com.clock==2 && data.priorrate==0) { /* clock2, LN rate prior */
\r
3032 z = log(rold/data.rgene[locus]) + data.sigma2[locus]/2;;
\r
3033 znew = log(rnew/data.rgene[locus]) + data.sigma2[locus]/2;
\r
3034 lnpRd = -log(rnew/rold) - (znew*znew - z*z)/(2*data.sigma2[locus]);
\r
3036 else if (com.clock==2 && data.priorrate==1) { /* clock2, gamma rate prior */
\r
3037 a = data.rgene[locus]*data.rgene[locus]/data.sigma2[locus];
\r
3038 b = data.rgene[locus]/data.sigma2[locus];
\r
3039 lnpRd = -b*(rnew-rold) + (a-1)*log(rnew/rold);
\r
3041 else { /* clock3 */
\r
3042 for(ir=0; ir<(sptree.nodes[inodeChanged].nson==0 ? 1 : 2); ir++) {
\r
3043 inode = (ir==0 ? sptree.nodes[inodeChanged].father : inodeChanged);
\r
3044 dad = sptree.nodes[inode].father;
\r
3045 for(i=0; i<2; i++) sons[i] = sptree.nodes[inode].sons[i];
\r
3046 t = sptree.nodes[inode].age;
\r
3047 if(inode==sptree.root) tA = 0;
\r
3048 else tA = (sptree.nodes[dad].age - t)/2;
\r
3049 t1 = (t - sptree.nodes[sons[0]].age)/2;
\r
3050 t2 = (t - sptree.nodes[sons[1]].age)/2;
\r
3051 detT = t1*t2+tA*(t1 + t2);
\r
3052 Tinv[0] = (tA+t2)/detT;
\r
3053 Tinv[1] = Tinv[2] = -tA/detT;
\r
3054 Tinv[3] = (tA+t1)/detT;
\r
3055 for(OldNew=0; OldNew<2; OldNew++) { /* old rate & new rate */
\r
3056 sptree.nodes[inodeChanged].rates[locus] = (OldNew==0 ? rold : rnew);
\r
3057 rA = (inode==sptree.root ? data.rgene[locus] : sptree.nodes[inode].rates[locus]);
\r
3058 r1 = sptree.nodes[sons[0]].rates[locus];
\r
3059 r2 = sptree.nodes[sons[1]].rates[locus];
\r
3060 y1 = log(r1/rA)+(tA+t1)*data.sigma2[locus]/2;
\r
3061 y2 = log(r2/rA)+(tA+t2)*data.sigma2[locus]/2;
\r
3062 zz = (y1*y1*Tinv[0]+2*y1*y2*Tinv[1]+y2*y2*Tinv[3]);
\r
3063 zz = zz/(2*data.sigma2[locus]) + log(r1*r2);
\r
3064 lnpRd -= (OldNew==0 ? -1 : 1) * zz;
\r
3072 double UpdateRates (double *lnL, double finetune)
\r
3074 /* This updates rates under the rate-drift models (clock=2 or 3).
\r
3075 It cycles through nodes in the species tree at each locus.
\r
3077 Slight waste of computation: For every proposal to change rates, lnpriorRates()
\r
3078 is called to calculate the prior for all rates at all loci on the whole
\r
3079 tree. This wasting computation if rates are not correlated across loci.
\r
3081 int locus, inode, j, g=data.ngene;
\r
3082 double naccept=0, lnpDinew, lnacceptance, lnLd, r, rnew, lnpRd;
\r
3083 double yb[2]={-99,99}, y, ynew;
\r
3085 if(finetune<=0) error2("steplength = 0 in UpdateRates");
\r
3086 for(locus=0; locus<g; locus++) {
\r
3087 for(inode=0; inode<sptree.nnode; inode++) {
\r
3088 if(inode == sptree.root) continue;
\r
3089 r = sptree.nodes[inode].rates[locus];
\r
3091 ynew = y + finetune*rndSymmetrical();
\r
3092 ynew = reflect(ynew, yb[0], yb[1]);
\r
3093 sptree.nodes[inode].rates[locus] = rnew = exp(ynew);
\r
3094 lnacceptance = ynew - y; /* proposal ratio */
\r
3095 UseLocus(locus, 1, mcmc.usedata, 0); /* copyconP=1 */
\r
3097 if(mcmc.saveconP) {
\r
3098 FOR(j,sptree.nspecies*2-1) com.oldconP[j]=1;
\r
3099 LabelOldCondP(inode);
\r
3101 lnpRd = lnpriorRatioRates(locus, inode, r);
\r
3102 lnacceptance += lnpRd;
\r
3103 lnpDinew = lnpD_locus(locus);
\r
3104 lnLd = lnpDinew - data.lnpDi[locus];
\r
3105 lnacceptance += lnLd;
\r
3106 if(lnacceptance>0 || rndu()<exp(lnacceptance)) {
\r
3108 if(mcmc.usedata==1) AcceptRejectLocus(locus,1);
\r
3110 data.lnpR += lnpRd;
\r
3111 data.lnpDi[locus] = lnpDinew;
\r
3115 if(mcmc.usedata==1) AcceptRejectLocus(locus,0);
\r
3116 sptree.nodes[inode].rates[locus] = r;
\r
3120 return(naccept/(g*(sptree.nnode-1)));
\r
3124 double logPriorRatioGamma(double xnew, double xold, double a, double b)
\r
3126 /* This calculates the log of prior ratio when x is updated from xold to xnew.
\r
3127 x has distribution with parameters a and b.
\r
3130 return (a-1)*log(xnew/xold) - b*(xnew-xold);
\r
3133 double UpdateParameters (double *lnL, double finetune)
\r
3135 /* This updates parameters in the substitution model such as the ts/tv
\r
3136 rate ratio for each locus.
\r
3138 Should we update the birth-death process parameters here as well?
\r
3141 int locus, j, ip, np=!com.fix_kappa+!com.fix_alpha;
\r
3142 double naccept=0, lnLd,lnpDinew, lnacceptance, c=1;
\r
3143 double yb[2]={-99,99},y,ynew, pold, pnew, *gammaprior;
\r
3145 if(np==0) return(0);
\r
3146 if(debug==4) puts("\nUpdateParameters");
\r
3147 if(finetune<=0) error2("steplength = 0 in UpdateParameters");
\r
3148 if(mcmc.saveconP) FOR(j,sptree.nspecies*2-1) com.oldconP[j]=0;
\r
3149 for(locus=0; locus<data.ngene; locus++) {
\r
3150 for(ip=0; ip<np; ip++) {
\r
3151 if(ip==0 && !com.fix_kappa) { /* kappa */
\r
3152 pold = data.kappa[locus];
\r
3154 ynew = y + finetune*rndSymmetrical();
\r
3155 ynew = reflect(ynew, yb[0], yb[1]);
\r
3156 data.kappa[locus] = pnew = exp(ynew);
\r
3157 gammaprior = data.kappagamma;
\r
3159 else { /* alpha */
\r
3160 pold = data.alpha[locus];
\r
3162 ynew = y + finetune*rndSymmetrical();
\r
3163 ynew = reflect(ynew, yb[0], yb[1]);
\r
3164 data.alpha[locus] = pnew = exp(ynew);
\r
3165 gammaprior = data.alphagamma;
\r
3167 lnacceptance = ynew - y;
\r
3169 UseLocus(locus, 1, mcmc.usedata, 0); /* this copies parameter from data.[] to com. */
\r
3171 lnpDinew = lnpD_locus(locus);
\r
3172 lnLd = lnpDinew - data.lnpDi[locus];
\r
3173 lnacceptance += lnLd;
\r
3174 lnacceptance += logPriorRatioGamma(pnew,pold,gammaprior[0],gammaprior[1]);
\r
3177 printf("\nLocus %2d para%d %9.4f%9.4f %10.5f", locus+1,ip+1,pold,pnew,lnLd);
\r
3179 if(lnacceptance >= 0 || rndu() < exp(lnacceptance)) {
\r
3182 data.lnpDi[locus] = lnpDinew;
\r
3183 if(mcmc.usedata==1) AcceptRejectLocus(locus,1);
\r
3184 if(debug==4) printf(" Y\n");
\r
3187 if(ip==0 && !com.fix_kappa)
\r
3188 data.kappa[locus] = pold;
\r
3190 data.alpha[locus] = pold;
\r
3192 if(mcmc.usedata==1) AcceptRejectLocus(locus,0);
\r
3194 if(debug==4) printf(" N\n");
\r
3198 return(naccept/(data.ngene*np));
\r
3202 double UpdateParaRates (double *lnL, double finetune, double space[])
\r
3204 /* This updates mu (mean rate) and sigma2 (variance of lnrates) for each locus.
\r
3205 The gamma-Dirichlet prior is assumed for mean rates (mu) across loci, and for sigma2.
\r
3206 For the clock (clock1), this changes mu for loci and changes the likelihood but there
\r
3207 is no prior for rates.
\r
3208 For relaxed clocks (clock2 and clock3), this changes mu and sigma2 for loci; it changes
\r
3209 the rate prior but not the likelihood.
\r
3210 The gamma-dirichlet prior on mu_i and sigma2_i (dos reis et al. 2014: eq. 5) is used.
\r
3212 int g=data.ngene, locus, ip, j;
\r
3213 char *parastr[2]={"mu", "sigma2"};
\r
3214 double lnacceptance, lnpDinew=0, lnpRnew=0, lnLd=0, naccept=0;
\r
3215 double yb[2]={-99,99},y,ynew, pold, pnew, sumold, sumnew, *gD; /* gamma-Dirichlet */
\r
3217 if(finetune<=0) error2("steplength = 0 in UpdateParaRates");
\r
3218 if(debug==5) puts("\nUpdateParaRates (rgene & sigma2)");
\r
3219 if(mcmc.saveconP) FOR(j,sptree.nspecies*2-1) com.oldconP[j]=0;
\r
3220 for(locus=0; locus<data.ngene; locus++) {
\r
3221 for(ip=0; ip<(com.clock==1 ? 1 : 2); ip++) { /* mu (rgene) and sigma2 for each locus */
\r
3223 if(ip==0) { /* rgene (mu) */
\r
3224 gD = data.rgenegD;
\r
3225 for(j=0,sumold=0; j<g; j++) sumold += data.rgene[j];
\r
3226 pold = data.rgene[locus];
\r
3228 ynew = y + finetune*rndSymmetrical();
\r
3229 ynew = reflect(ynew, yb[0], yb[1]);
\r
3230 data.rgene[locus] = pnew = exp(ynew);
\r
3231 if(com.clock==1) {
\r
3232 UseLocus(locus, 1, mcmc.usedata, 0);
\r
3233 lnpDinew = lnpD_locus(locus);
\r
3234 lnLd = lnpDinew - data.lnpDi[locus]; /* likelihood ratio */
\r
3235 lnacceptance += lnLd;
\r
3238 else { /* sigma2 */
\r
3239 gD = data.sigma2gD;
\r
3240 for(j=0,sumold=0; j<g; j++) sumold += data.sigma2[j];
\r
3241 pold = data.sigma2[locus];
\r
3243 ynew = y + finetune*rndSymmetrical();
\r
3244 ynew = reflect(ynew, yb[0], yb[1]);
\r
3245 data.sigma2[locus] = pnew = exp(ynew);
\r
3247 sumnew = sumold + pnew - pold;
\r
3248 lnacceptance += ynew - y;
\r
3249 /* gamma-dirichlet prior on mu_i and sigma2_i, equation 5 in dos reis et al. (2014). */
\r
3250 lnacceptance += (gD[0]-gD[2]*g)*log(sumnew/sumold) - gD[1]/g*(sumnew-sumold) + (gD[2]-1)*(ynew-y);
\r
3251 if(debug==5) printf("%-7s locus %2d %9.5f -> %9.5f ", parastr[ip], locus, pold, pnew);
\r
3254 lnpRnew = lnpriorRates();
\r
3255 lnacceptance += lnpRnew-data.lnpR;
\r
3257 if(lnacceptance>=0 || rndu()<exp(lnacceptance)) {
\r
3259 if(com.clock==1) {
\r
3261 data.lnpDi[locus] = lnpDinew;
\r
3262 if(mcmc.usedata==1) AcceptRejectLocus(locus,1);
\r
3265 data.lnpR = lnpRnew;
\r
3269 data.rgene[locus] = pold;
\r
3270 if(com.clock==1 && mcmc.usedata==1)
\r
3271 AcceptRejectLocus(locus,0); /* reposition conP */
\r
3274 data.sigma2[locus] = pold;
\r
3278 return(naccept/(com.clock==1 ? g : 2*g));
\r
3282 double mixingTipDate (double *lnL, double finetune)
\r
3284 /* This increases or decreases all node ages in a 1-D move, applied to TipDated data.
\r
3285 1 September 2011, Aguas de Lindoia
\r
3287 int g=data.ngene, i, j, k, jj, locus, ndivide = data.ngene; /* for mu */
\r
3288 double naccept=0, c, lnc, lnacceptance=0, lnpTnew, lnpRnew=-9999, lnpDinew[NGENE], lnLd;
\r
3289 double *gD=data.rgenegD, sumold=0, sumnew;
\r
3290 double AgeLow[NS]={0}, x[NS]={1}, tz;
\r
3292 /* find AgeLow[] and x[] for the interior nodes */
\r
3296 for(j=0; j<sptree.nspecies; j++) {
\r
3297 tz = sptree.nodes[j].age;
\r
3298 for(k=sptree.nodes[j].father; k!=-1; k=sptree.nodes[k].father)
\r
3299 if(tz < AgeLow[k-sptree.nspecies])
\r
3302 AgeLow[k-sptree.nspecies] = tz;
\r
3305 for(j=sptree.nspecies; j<sptree.nnode; j++)
\r
3306 printf("node %2d age %9.5f agelow %9.5f\n", j+1, sptree.nodes[j].age, AgeLow[j-sptree.nspecies]);
\r
3308 for(j=sptree.nspecies; j<sptree.nnode; j++) {
\r
3309 if(j==sptree.root) continue;
\r
3310 jj = j-sptree.nspecies;
\r
3311 x[jj] = (sptree.nodes[j].age - AgeLow[jj])/(sptree.nodes[sptree.nodes[j].father].age - AgeLow[jj]);
\r
3313 lnacceptance = lnc = finetune*rndSymmetrical();
\r
3314 c = exp(lnacceptance);
\r
3315 sptree.nodes[sptree.root].age = AgeLow[0] + (sptree.nodes[sptree.root].age - AgeLow[0]) * c;
\r
3316 for(j=sptree.nspecies; j<sptree.nnode; j++) {
\r
3317 if(j==sptree.root) continue;
\r
3318 jj = j-sptree.nspecies;
\r
3319 tz = sptree.nodes[j].age;
\r
3320 sptree.nodes[j].age = AgeLow[jj] + x[jj] * (sptree.nodes[sptree.nodes[j].father].age - AgeLow[jj]);
\r
3321 lnacceptance += log( (sptree.nodes[j].age - AgeLow[jj])/(tz - AgeLow[jj]) );
\r
3324 lnpTnew = lnpriorTimes();
\r
3325 lnacceptance += lnpTnew - data.lnpT;
\r
3327 /* dividing all rates by c. */
\r
3328 for(j=0; j<g; j++) { /* mu */
\r
3329 sumold += data.rgene[j];
\r
3330 data.rgene[j] /= c;
\r
3332 sumnew = sumold/c;
\r
3333 lnacceptance += (gD[0]-gD[2]*g)*log(sumnew/sumold) - gD[1]/g*(sumnew-sumold) + (gD[2]-1)*g*(-lnc);
\r
3335 if(com.clock>1) { /* rate-drift models */
\r
3336 ndivide += g*(sptree.nspecies*2 - 2);
\r
3337 for(i=0; i<sptree.nnode; i++)
\r
3338 if(i != sptree.root)
\r
3339 for(j=0; j<g; j++)
\r
3340 sptree.nodes[i].rates[j] /= c;
\r
3341 lnpRnew = lnpriorRates();
\r
3342 lnacceptance += lnpRnew - data.lnpR;
\r
3344 lnacceptance -= ndivide*lnc;
\r
3346 if(mcmc.saveconP)
\r
3347 for(j=0; j<sptree.nspecies*2-1; j++)
\r
3348 com.oldconP[j] = 0;
\r
3349 for(locus=0,lnLd=0; locus<g; locus++) {
\r
3350 UseLocus(locus, 1, mcmc.usedata, 0);
\r
3351 lnpDinew[locus] = lnpD_locus(locus);
\r
3352 lnLd += lnpDinew[locus] - data.lnpDi[locus];
\r
3354 lnacceptance += lnLd;
\r
3356 if(lnacceptance>0 || rndu()<exp(lnacceptance)) { /* accept */
\r
3358 data.lnpT = lnpTnew;
\r
3359 data.lnpR = lnpRnew;
\r
3360 for(locus=0; locus<g; locus++)
\r
3361 data.lnpDi[locus] = lnpDinew[locus];
\r
3362 if(mcmc.usedata==1) switchconPin();
\r
3365 else { /* reject */
\r
3366 /* restore all node ages */
\r
3367 sptree.nodes[sptree.root].age = AgeLow[0] + (sptree.nodes[sptree.root].age - AgeLow[0])/c;
\r
3368 for(j=sptree.nspecies; j<sptree.nnode; j++) {
\r
3369 if(j==sptree.root) continue;
\r
3370 jj = j-sptree.nspecies;
\r
3371 sptree.nodes[j].age = AgeLow[jj] + x[jj] * (sptree.nodes[sptree.nodes[j].father].age - AgeLow[jj]);
\r
3375 /* restore all rates */
\r
3376 for(j=0; j<g; j++) data.rgene[j] *= c;
\r
3377 if(com.clock > 1) {
\r
3378 for(i=0; i<sptree.nnode; i++)
\r
3379 if(i != sptree.root)
\r
3380 for(j=0; j<g; j++)
\r
3381 sptree.nodes[i].rates[j] *= c;
\r
3384 for(locus=0; locus<g; locus++)
\r
3385 AcceptRejectLocus(locus, 0); /* reposition conP */
\r
3392 double mixing (double *lnL, double finetune)
\r
3394 /* If com.clock==1, this multiplies all times by c and divides all rates
\r
3395 (mu or data.rgene[]) by c, so that the likelihood does not change.
\r
3397 If com.clock=2 or 3, this multiplies all times by c and divide by c all rates:
\r
3398 sptree.nodes[].rates and mu (data.rgene[]) at all loci. The likelihood is
\r
3401 int g=data.ngene, i, j, ndivide = data.ngene; /* for mu */
\r
3402 double naccept=0, *gD=data.rgenegD, c, lnc, sumold=0, sumnew;
\r
3403 double lnacceptance=0, lnpTnew, lnpRnew=-1;
\r
3405 if(finetune<=0) error2("steplength = 0 in mixing");
\r
3407 return mixingTipDate(lnL, finetune);
\r
3409 lnc = finetune*rndSymmetrical();
\r
3411 for(j=sptree.nspecies; j<sptree.nnode; j++)
\r
3412 sptree.nodes[j].age *= c;
\r
3414 for(j=0; j<g; j++) { /* mu */
\r
3415 sumold += data.rgene[j];
\r
3416 data.rgene[j] /= c;
\r
3418 sumnew = sumold/c;
\r
3419 lnacceptance += (gD[0]-gD[2]*g)*log(sumnew/sumold) - gD[1]/g*(sumnew-sumold) + (gD[2]-1)*g*(-lnc);
\r
3421 if(com.clock>1) { /* rate-drift models */
\r
3422 ndivide += g*(sptree.nspecies*2-2);
\r
3423 for(i=0; i<sptree.nnode; i++)
\r
3424 if(i != sptree.root)
\r
3425 for(j=0; j<g; j++)
\r
3426 sptree.nodes[i].rates[j] /= c;
\r
3427 lnpRnew = lnpriorRates();
\r
3428 lnacceptance += lnpRnew-data.lnpR;
\r
3431 lnpTnew = lnpriorTimes();
\r
3432 lnacceptance += lnpTnew-data.lnpT + (sptree.nspecies-1 - ndivide)*lnc;
\r
3434 if(lnacceptance>0 || rndu()<exp(lnacceptance)) { /* accept */
\r
3436 data.lnpT = lnpTnew;
\r
3437 data.lnpR = lnpRnew;
\r
3439 else { /* reject */
\r
3440 for(j=sptree.nspecies; j<sptree.nnode; j++)
\r
3441 sptree.nodes[j].age /= c;
\r
3442 for(j=0; j<g; j++) data.rgene[j] *= c;
\r
3443 if(com.clock > 1) {
\r
3444 for(i=0; i<sptree.nnode; i++)
\r
3445 if(i != sptree.root)
\r
3446 for(j=0; j<g; j++)
\r
3447 sptree.nodes[i].rates[j] *= c;
\r
3454 double UpdatePFossilErrors (double finetune)
\r
3456 /* This updates the probability of fossil errors sptree.Pfossilerr.
\r
3457 The proposal do not change the likelihood.
\r
3459 double lnacceptance, lnpTnew, naccept=0, pold, pnew;
\r
3460 double p = data.pfossilerror[0], q = data.pfossilerror[1];
\r
3462 if(finetune<=0) error2("steplength = 0 in UpdatePFossilErrors");
\r
3463 pold = data.Pfossilerr;
\r
3464 pnew = pold + finetune*rndSymmetrical();
\r
3465 data.Pfossilerr = pnew = reflect(pnew,0,1);
\r
3466 lnacceptance = (p-1)*log(pnew/pold) + (q-1)*log((1-pnew)/(1-pold));
\r
3467 lnpTnew = lnpriorTimes();
\r
3468 lnacceptance += lnpTnew - data.lnpT;
\r
3470 if(lnacceptance>=0 || rndu()<exp(lnacceptance)) {
\r
3472 data.lnpT = lnpTnew;
\r
3475 data.Pfossilerr = pold;
\r
3481 int DescriptiveStatisticsSimpleMCMCTREE (FILE *fout, char infile[], int nbin)
\r
3483 FILE *fin=gfopen(infile,"r"), *fFigTree;
\r
3484 char *fmt=" %9.6f", *fmt1=" %9.1f", timestr[32], FigTreef[96]="FigTree.tre";
\r
3485 double *dat, *x, *mean, *median, *minx, *maxx, *x005,*x995,*x025,*x975,*xHPD025,*xHPD975,*var;
\r
3486 double *y, *Tint, tmp[2];
\r
3487 int n, p, i, j, jj;
\r
3488 size_t sizedata=0;
\r
3489 static int lline=10000000, ifields[MAXNFIELDS], SkipC1=1, HasHeader;
\r
3491 static char varstr[MAXNFIELDS][32]={""};
\r
3493 puts("\nSummarizing MCMC samples . ..");
\r
3494 if((line=(char*)malloc(lline*sizeof(char)))==NULL) error2("oom ds");
\r
3495 scanfile(fin, &n, &p, &HasHeader, line, ifields);
\r
3496 printf("%d records, %d variables\n", n, p);
\r
3498 sizedata = (size_t)p*n*sizeof(double);
\r
3499 if(fabs((double)p*n*sizeof(double) - sizedata) > 1)
\r
3500 error2("data matrix too big to fit in memory");
\r
3501 dat = (double*)malloc(sizedata);
\r
3502 mean = (double*)malloc((p*13+n)*sizeof(double));
\r
3503 if (dat==NULL||mean==NULL) error2("oom DescriptiveStatistics.");
\r
3504 memset(dat, 0, sizedata);
\r
3505 memset(mean, 0, (p*12+n)*sizeof(double));
\r
3506 median=mean+p; minx=median+p; maxx=minx+p;
\r
3507 x005=maxx+p; x995=x005+p; x025=x995+p; x975=x025+p; xHPD025=x975+p; xHPD975=xHPD025+p;
\r
3508 var=xHPD975+p; Tint=var+p; y=Tint+p;
\r
3510 if(HasHeader) { /* line has the first line of variable names */
\r
3511 for(i=0; i<p; i++) {
\r
3512 if(ifields[i]<0 || ifields[i]>lline-2) error2("line not long enough?");
\r
3513 sscanf(line+ifields[i], "%s", varstr[i]);
\r
3516 for(i=0; i<n; i++)
\r
3517 for(j=0; j<p; j++) {
\r
3518 fscanf(fin, "%lf", &dat[j*n+i]);
\r
3522 printf("Collecting mean, median, min, max, percentiles, etc.\n");
\r
3523 for(j=SkipC1,x=dat+j*n; j<p; j++,x+=n) {
\r
3524 memmove(y, x, n*sizeof(double));
\r
3525 Tint[j] = 1/Eff_IntegratedCorrelationTime(y, n, &mean[j], &var[j]);
\r
3526 qsort(x, (size_t)n, sizeof(double), comparedouble);
\r
3527 minx[j] = x[0]; maxx[j] = x[n-1];
\r
3528 median[j] = (n%2==0 ? (x[n/2]+x[n/2+1])/2 : x[(n+1)/2]);
\r
3529 x005[j] = x[(int)(n*.005)]; x995[j] = x[(int)(n*.995)];
\r
3530 x025[j] = x[(int)(n*.025)]; x975[j] = x[(int)(n*.975)];
\r
3532 HPDinterval(x, n, tmp, 0.05);
\r
3533 xHPD025[j] = tmp[0];
\r
3534 xHPD975[j] = tmp[1];
\r
3535 if((j+1)%100==0 || j==p-1)
\r
3536 printf("\r\t\t\t%6d/%6d done %s", j+1, p, printtime(timestr));
\r
3540 if(nodes[sptree.root].age==0) { /* the ages need be reset if mcmc.print<0 */
\r
3541 for(j=sptree.nspecies; j<sptree.nnode; j++)
\r
3542 nodes[j].age = mean[SkipC1+j-sptree.nspecies];
\r
3543 for(j=0; j<sptree.nnode; j++)
\r
3544 if(j!=sptree.root) {
\r
3545 nodes[j].branch = nodes[nodes[j].father].age - nodes[j].age;
\r
3546 if(nodes[j].branch<0)
\r
3547 puts("blength<0");
\r
3550 for(j=sptree.nspecies; j<sptree.nnode; j++) {
\r
3551 nodes[j].nodeStr = (char*)malloc(32*sizeof(char));
\r
3552 jj = SkipC1 + j - sptree.nspecies;
\r
3553 sprintf(nodes[j].nodeStr, "[&95%%={%.6g, %.6g}]", x025[jj], x975[jj]);
\r
3555 FPN(fout); OutTreeN(fout,1,PrBranch|PrLabel); FPN(fout); /* prints ages & CIs */
\r
3557 /* sprintf(FigTreef, "%s.FigTree.tre", com.seqf); */
\r
3558 if((fFigTree=(FILE*)fopen(FigTreef, "w")) == NULL)
\r
3559 error2("FigTree.tre file creation error");
\r
3560 fprintf(fFigTree, "#NEXUS\nBEGIN TREES;\n\n\tUTREE 1 = ");
\r
3561 OutTreeN(fFigTree, 1, PrBranch|PrLabel);
\r
3562 fprintf(fFigTree, "\n\nEND;\n");
\r
3565 printf("\nThe FigTree tree is in FigTree.tre");
\r
3566 fprintf(fFigTree, "\n[Note for FigTree: Under Time Scale, set Offset = %.1f, Scale factor = -%.1f\n",
\r
3567 com.TipDate, com.TipDate_TimeUnit);
\r
3568 fprintf(fFigTree, "Untick Scale Bar, & tick Tip Labels, Node Bars, Scale Axis, Reverse Axis, Show Grid.]\n");
\r
3573 if(com.clock>=2 && mcmc.print>=2) {
\r
3574 jj = SkipC1 + sptree.nspecies - 1 + data.ngene * 2;
\r
3575 for(i=0; i<data.ngene; i++) {
\r
3576 fprintf(fout, "\nrategram locus %d:\n", i+1);
\r
3577 for(j=0; j<sptree.nnode; j++) {
\r
3578 if(j==sptree.root) continue;
\r
3579 nodes[j].branch = mean[jj++];
\r
3581 OutTreeN(fout,1,PrBranch); FPN(fout);
\r
3585 printf("\n\nPosterior mean (95%% Equal-tail CI) (95%% HPD CI) HPD-CI-width\n\n");
\r
3586 for (j=SkipC1; j<p; j++) {
\r
3587 printf("%-9s ", varstr[j]);
\r
3588 printf("%10.4f (%6.4f, %6.4f) (%6.4f, %6.4f) %6.4f", mean[j], x025[j], x975[j], xHPD025[j], xHPD975[j], xHPD975[j]-xHPD025[j]);
\r
3589 if(j<SkipC1 + sptree.nspecies-1) {
\r
3590 printf(" (Jnode %2d)", 2*sptree.nspecies-1-1-j+SkipC1);
\r
3592 printf(" time: %6.2f (%5.2f, %5.2f)", com.TipDate-mean[j]*com.TipDate_TimeUnit,
\r
3593 com.TipDate-x975[j]*com.TipDate_TimeUnit, com.TipDate-x025[j]*com.TipDate_TimeUnit);
\r
3598 /* int correl (double x[], double y[], int n, double *mx, double *my, double *vxx, double *vxy, double *vyy, double *r) */
\r
3601 fprintf(fout, "\n\nPosterior mean (95%% Equal-tail CI) (95%% HPD CI) HPD-CI-width\n\n");
\r
3602 for(j=SkipC1; j<p; j++) {
\r
3603 fprintf(fout, "%-9s ", varstr[j]);
\r
3604 fprintf(fout, "%10.4f (%6.4f, %6.4f) (%6.4f, %6.4f) %6.4f", mean[j], x025[j], x975[j], xHPD025[j], xHPD975[j], xHPD975[j]-xHPD025[j]);
\r
3605 if(j<SkipC1+sptree.nspecies-1) {
\r
3606 fprintf(fout, " (Jnode %2d)", 2*sptree.nspecies-1-1-j+SkipC1);
\r
3608 fprintf(fout, " time: %6.2f (%5.2f, %5.2f)", com.TipDate-mean[j]*com.TipDate_TimeUnit,
\r
3609 com.TipDate-x975[j]*com.TipDate_TimeUnit, com.TipDate-x025[j]*com.TipDate_TimeUnit);
\r
3611 fprintf(fout, "\n");
\r
3614 printf("\nmean"); for(j=SkipC1; j<p; j++) printf("\t%.4f", mean[j]);
\r
3615 printf("\nEff "); for(j=SkipC1; j<p; j++) printf("\t%.4f", 1/Tint[j]);
\r
3619 free(dat); free(mean); free(line);
\r
3620 for(j=sptree.nspecies; j<sptree.nnode; j++)
\r
3621 free(nodes[j].nodeStr);
\r
3626 int MCMC (FILE* fout)
\r
3628 FILE *fmcmc = NULL;
\r
3629 int nsteps=5+(data.pfossilerror[0]>0), nxpr[2]={5, 1};
\r
3630 int i, j, k, ir, g=data.ngene;
\r
3631 double Pjump[6]={0}, lnL=0, nround=0, *x, *mx, postEFossil[MaxNFossils]={0};
\r
3632 double au=data.rgenegD[0], bu=data.rgenegD[1], a=data.rgenegD[2];
\r
3635 mcmc.saveconP = 1;
\r
3636 if(mcmc.usedata!=1) mcmc.saveconP = 0;
\r
3637 for(j=0; j<sptree.nspecies*2-1; j++)
\r
3638 com.oldconP[j] = 0;
\r
3640 if(mcmc.usedata == 2)
\r
3641 ReadBlengthGH(com.inBVf);
\r
3643 printf("\n%d burnin, sampled every %d, %d samples\n",
\r
3644 mcmc.burnin, mcmc.sampfreq, mcmc.nsample);
\r
3645 if(mcmc.usedata) puts("Approximating posterior");
\r
3646 else puts("Approximating prior");
\r
3648 printf("(Settings: cleandata=%d print=%d saveconP=%d)\n",
\r
3649 com.cleandata, mcmc.print, mcmc.saveconP);
\r
3651 com.np = GetInitials();
\r
3656 x=(double*)malloc(com.np*2*sizeof(double));
\r
3657 if(x==NULL) error2("oom in mcmc");
\r
3661 fmcmc = gfopen(com.mcmcf,"w");
\r
3662 collectx(fmcmc, x);
\r
3663 if(!com.fix_kappa && !com.fix_alpha && data.ngene==2) { nxpr[0]=6; nxpr[1]=4; }
\r
3665 puts("\npriors: ");
\r
3666 if(mcmc.usedata==1) {
\r
3667 if(!com.fix_kappa) printf("\tG(%7.4f, %7.4f) for kappa\n", data.kappagamma[0], data.kappagamma[1]);
\r
3668 if(!com.fix_alpha) printf("\tG(%7.4f, %7.4f) for alpha\n", data.alphagamma[0], data.alphagamma[1]);
\r
3670 printf("\tmu_i ~ gammaDir(%.4f, %.4f, %.4f)", data.rgenegD[0], data.rgenegD[1], data.rgenegD[2]);
\r
3671 printf(", SD(mu_i) = %9.6f\n", sqrt(((g-1)/(g*a+1)*(au+1) + 1)*au/(bu*bu)));
\r
3673 printf("\tsigma2 ~ gammaDir(%.4f, %.4f, %.4f)\n", data.sigma2gD[0], data.sigma2gD[1], data.sigma2gD[2]);
\r
3675 /* calculates prior for times and likelihood for each locus */
\r
3676 if(data.pfossilerror[0])
\r
3677 SetupPriorTimesFossilErrors();
\r
3678 data.lnpT = lnpriorTimes();
\r
3680 for(j=0; j<data.ngene; j++) com.rgene[j]=-1; /* com.rgene[] is not used. */
\r
3682 data.lnpR = lnpriorRates();
\r
3683 printf("\nInitial parameters (np = %d):\n", com.np);
\r
3684 for(j=0;j<com.np;j++) printf(" %9.6f",x[j]); FPN(F0);
\r
3685 lnL = lnpData(data.lnpDi);
\r
3686 printf("\nlnL0 = %9.2f\n", lnL);
\r
3688 printf("\nStarting MCMC (np = %d) . . .\n", com.np);
\r
3690 printf("paras: %d times, %d mu, (& kappa, alpha)\n", sptree.nspecies-1, data.ngene);
\r
3692 printf("paras: %d times, %d mu, %d sigma2 (& rates, kappa, alpha)\n", sptree.nspecies-1, data.ngene, data.ngene);
\r
3693 printf("finetune steps (t mu&sigma2 r mixing paras ...):");
\r
3694 for(j=0; j<nsteps; j++) printf(" %7.4f", mcmc.finetune[j]);
\r
3698 if(com.np<nxpr[0]+nxpr[1]) { nxpr[0]=com.np; nxpr[1]=0; }
\r
3699 for(ir=-mcmc.burnin; ir<mcmc.sampfreq*mcmc.nsample; ir++) {
\r
3700 if(ir==0 || (mcmc.resetFinetune && nround>=100 && mcmc.burnin>=200
\r
3701 && ir<0 && ir%(mcmc.burnin/4)==0)) {
\r
3702 /* reset finetune parameters. Do this twice. */
\r
3703 if(mcmc.resetFinetune && mcmc.burnin>=200) {
\r
3704 ResetFinetuneSteps(fout, Pjump, mcmc.finetune, nsteps);
\r
3707 zero(Pjump, nsteps);
\r
3708 zero(mx, com.np);
\r
3710 if(fabs(lnL-lnpData(data.lnpDi)) > 0.001) {
\r
3711 printf("\n%12.6f = %12.6f? Resetting lnL\n", lnL, lnpData(data.lnpDi));
\r
3712 lnL = lnpData(data.lnpDi);
\r
3718 Pjump[0] = (Pjump[0]*(nround-1) + UpdateTimes(&lnL, mcmc.finetune[0]))/nround;
\r
3719 Pjump[1] = (Pjump[1]*(nround-1) + UpdateParaRates(&lnL, mcmc.finetune[1],com.space))/nround;
\r
3721 Pjump[2] = (Pjump[2]*(nround-1) + UpdateRates(&lnL, mcmc.finetune[2]))/nround;
\r
3722 Pjump[3] = (Pjump[3]*(nround-1) + mixing(&lnL, mcmc.finetune[3]))/nround;
\r
3723 if(mcmc.usedata==1)
\r
3724 Pjump[4] = (Pjump[4]*(nround-1) + UpdateParameters(&lnL, mcmc.finetune[4]))/nround;
\r
3725 if(data.pfossilerror[0])
\r
3726 Pjump[5] = (Pjump[5]*(nround-1) + UpdatePFossilErrors(mcmc.finetune[5]))/nround;
\r
3728 /* printf("root age = %.4f\n", sptree.nodes[sptree.root].age); */
\r
3730 if(mcmc.print) collectx(fmcmc, x);
\r
3732 for(j=0; j<com.np; j++) mx[j] = (mx[j]*(nround-1) + x[j])/nround;
\r
3734 if(data.pfossilerror[0])
\r
3735 getPfossilerr(postEFossil, nround);
\r
3737 if(mcmc.print && ir>=0 && (ir==0 || (ir+1)%mcmc.sampfreq==0)) {
\r
3738 fprintf(fmcmc,"%d", ir+1);
\r
3739 for(j=0;j<com.np; j++) fprintf(fmcmc,"\t%.7f",x[j]);
\r
3740 if(mcmc.usedata) fprintf(fmcmc,"\t%.3f",lnL);
\r
3743 if((ir+1)%max2(mcmc.sampfreq, mcmc.sampfreq*mcmc.nsample/100)==0) {
\r
3744 printf("\r%3.0f%%", (ir+1.)/(mcmc.nsample*mcmc.sampfreq)*100.);
\r
3746 for(j=0; j<nsteps; j++)
\r
3747 printf(" %4.2f", Pjump[j]);
\r
3750 FOR(j,nxpr[0]) printf(" %5.3f", mx[j]);
\r
3751 if(com.np>nxpr[0]+nxpr[1] && nxpr[1]) printf(" -");
\r
3752 FOR(j,nxpr[1]) printf(" %5.3f", mx[com.np-nxpr[1]+j]);
\r
3753 if(mcmc.usedata) printf(" %4.1f", lnL);
\r
3756 if(mcmc.sampfreq*mcmc.nsample>20 && (ir+1)%(mcmc.sampfreq*mcmc.nsample/20)==0) {
\r
3758 if(fabs(lnL-lnpData(data.lnpDi)) > 0.001) {
\r
3759 printf("\n%12.6f = %12.6f? Resetting lnL\n", lnL, lnpData(data.lnpDi));
\r
3760 lnL = lnpData(data.lnpDi);
\r
3764 if(mcmc.print) fflush(fmcmc);
\r
3765 printf(" %s\n", printtime(timestr));
\r
3770 if(mcmc.print) fclose(fmcmc);
\r
3772 printf("\nTime used: %s", printtime(timestr));
\r
3773 fprintf(fout,"\nTime used: %s", printtime(timestr));
\r
3775 fprintf(fout, "\n\nmean of parameters using all iterations\n");
\r
3776 for(i=0; i<com.np; i++) fprintf(fout, " %9.5f", mx[i]);
\r
3780 for(j=0; j<sptree.nspecies-1; j++)
\r
3781 nodes[sptree.nspecies+j].age = mx[j];
\r
3782 for(j=0; j<sptree.nnode; j++)
\r
3783 if(j!=sptree.root)
\r
3784 nodes[j].branch = nodes[nodes[j].father].age - nodes[j].age;
\r
3785 fputs("\nSpecies tree for FigTree. Branch lengths = posterior mean times; 95% CIs = labels", fout);
\r
3786 FPN(fout); OutTreeN(fout,1,PrNodeNum); FPN(fout);
\r
3787 FPN(fout); OutTreeN(fout,1,1); FPN(fout);
\r
3790 DescriptiveStatisticsSimpleMCMCTREE(fout, com.mcmcf, 1);
\r
3791 if(data.pfossilerror[0]) {
\r
3792 free(data.CcomFossilErr);
\r
3793 printf("\nPosterior probabilities that each fossil is in error.\n");
\r
3794 for(i=k=0; i<sptree.nspecies*2-1; i++) {
\r
3795 if( (j=sptree.nodes[i].fossil) )
\r
3796 printf("Node %2d: %s (%9.4f, %9.4f) %8.4f\n", i+1, fossils[j],
\r
3797 sptree.nodes[i].pfossil[0], sptree.nodes[i].pfossil[1], postEFossil[k++]);
\r
3799 fprintf(fout, "\nPosterior probabilities that each fossil is in error.\n");
\r
3800 for(i=k=0; i<sptree.nspecies*2-1; i++) {
\r
3801 if( (j=sptree.nodes[i].fossil) )
\r
3802 fprintf(fout, "Node %2d: %s (%9.4f, %9.4f) %8.4f\n", i+1, fossils[j],
\r
3803 sptree.nodes[i].pfossil[0], sptree.nodes[i].pfossil[1], postEFossil[k++]);
\r
3809 printf("\ntime prior: %s\n", (data.priortime?"beta":"Birth-Death-Sampling"));
\r
3810 printf("rate prior: %s\n", (data.priorrate?"gamma":"Log-Normal"));
\r
3811 if(mcmc.usedata==2 && data.transform)
\r
3812 printf("%s transform is used in approx like calculation.\n", Btransform[data.transform]);
\r
3813 printf("\nTime used: %s\n", printtime(timestr));
\r