2 Maximum likelihood parameter estimation e aligned DNA (RNA) sequences,
\r
3 combined with phylogenetic tree estimation.
\r
4 Copyright, Ziheng YANG, July 1992 onwards
\r
6 cc -o baseml -fast baseml.c tools.c -lm
\r
7 cl -O2 baseml.c tools.c
\r
8 baseml <ControlFileName>
\r
14 #define NBRANCH (NS*2-2)
\r
15 #define NNODE (NS*2-1)
\r
16 #define MAXNSONS 200
\r
22 #define NP (NBRANCH+NGENE+11)
\r
24 #define NP (NBRANCH+3*NNODE+NGENE+11+NCATG*NCATG-1)
\r
26 extern int noisy, NFunCall, NEigenQ, NPMatUVRoot, *ancestor, GeneticCode[][64];
\r
27 extern double Small_Diff, *SeqDistance;
\r
29 int Forestry (FILE *fout);
\r
30 void DetailOutput(FILE *fout, double x[], double var[]);
\r
31 int GetOptions(char *ctlf);
\r
32 int GetInitials(double x[], int *fromfile);
\r
33 int GetInitialsTime(double x[]);
\r
34 int SetxInitials(int np, double x[], double xb[][2]);
\r
35 int SetParameters(double x[]);
\r
36 int SetPGene(int igene, int _pi, int _UVRoot, int _alpha, double x[]);
\r
37 int SetPSiteClass(int iclass, double x[]);
\r
38 int testx(double x[], int np);
\r
39 double GetBranchRate(int igene, int ibrate, double x[], int *ix);
\r
40 int GetPMatBranch (double Pt[], double x[], double t, int inode);
\r
41 int ConditionalPNode (int inode, int igene, double x[]);
\r
43 int TransformxBack(double x[]);
\r
44 int AdHocRateSmoothing (FILE*fout, double x[NS*3], double xb[NS*3][2], double space[]);
\r
45 void DatingHeteroData(FILE* fout);
\r
47 int TestModel (FILE *fout, double x[], int nsep, double space[]);
\r
48 int OldDistributions (int inode, double AncientFreqs[]);
\r
49 int SubData(int argc, char *argv[]);
\r
50 int GroupDistances();
\r
54 unsigned char *z[NS];
\r
55 char *spname[NS], seqf[512],outf[512],treef[512], cleandata;
\r
56 char oldconP[NNODE]; /* update conP for nodes to save computation (0 yes; 1 no) */
\r
57 int seqtype, ns, ls, ngene, posG[NGENE+1], lgene[NGENE], *pose, npatt, readpattern;
\r
58 int np, ntime, nrgene, nrate, nalpha, npi, nhomo, ncatG, ncode, Mgene;
\r
59 size_t sspace, sconP;
\r
60 int fix_kappa, fix_rgene, fix_alpha, fix_rho, nparK, fix_blength;
\r
61 int clock, model, getSE, runmode, print, verbose, ndata, bootstrap;
\r
62 int icode, coding, method;
\r
64 double *fpatt, kappa, alpha, rho, rgene[NGENE], pi[4],piG[NGENE][4], TipDate, TipDate_TimeUnit;
\r
65 double freqK[NCATG], rK[NCATG], MK[NCATG*NCATG];
\r
66 double (*plfun)(double x[],int np), *conP, *fhK, *space;
\r
67 int conPSiteClass; /* is conP memory allocated for each class? */
\r
69 char *nodeScale; /* nScale[ns-1] for interior nodes */
\r
70 double *nodeScaleF; /* nScaleF[npatt] for scale factors */
\r
75 int nbranch, nnode, root, branches[NBRANCH][2];
\r
79 int father, nson, sons[MAXNSONS], ibranch, ipop;
\r
80 double branch, age, *pkappa, pi[4], *conP, label;
\r
81 char *nodeStr, fossil;
\r
82 } *nodes, **gnodes, nodes_t[2*NS-1];
\r
85 /* for sptree.nodes[].fossil: lower, upper, bounds, gamma, inverse-gamma */
\r
86 enum {LOWER_F=1, UPPER_F, BOUND_F} FOSSIL_FLAGS;
\r
87 char *fossils[]={" ", "L", "U", "B"};
\r
89 struct SPECIESTREE {
\r
90 int nbranch, nnode, root, nspecies, nfossil;
\r
92 char name[LSPNAME+1], fossil, usefossil; /* fossil: 0, 1, 2, 3 */
\r
93 int father, nson, sons[2];
\r
94 double age, pfossil[7]; /* lower and upper bounds or alpha & beta */
\r
95 double *lnrates; /* log rates for loci */
\r
98 /* all trees are binary & rooted, with ancestors unknown. */
\r
100 struct DATA { /* locus-specific data and tree information */
\r
101 int ns[NGENE], ls[NGENE], npatt[NGENE], ngene, lgene[NGENE];
\r
102 int root[NGENE+1], BlengthMethod, fix_nu, nbrate[NGENE];
\r
103 char *z[NGENE][NS], cleandata[NGENE];
\r
104 double *fpatt[NGENE], lnpT, lnpR, lnpDi[NGENE];
\r
105 double Qfactor[NGENE], pi[NGENE][NCODE], nu[NGENE];
\r
106 double rgene[NGENE], kappa[NGENE], alpha[NGENE], omega[1];
\r
107 int NnodeScale[NGENE];
\r
108 char *nodeScale[NGENE]; /* nScale[data.ns[locus]-1] for interior nodes */
\r
111 int nR=NCODE, LASTROUND, BayesEB;
\r
112 double PMat[NCODE*NCODE], Cijk[NCODE*NCODE*NCODE], Root[NCODE];
\r
113 int StepMatrix[NCODE*NCODE];
\r
116 FILE *frub, *flnf, *frst, *frst1, *frst2=NULL, *finitials=NULL;
\r
117 char *ratef="rates";
\r
118 char *models[]={"JC69","K80","F81","F84","HKY85","T92","TN93","REV","UNREST", "REVu","UNRESTu"};
\r
119 enum {JC69, K80, F81, F84, HKY85, T92, TN93, REV, UNREST, REVu, UNRESTu} MODELS;
\r
120 char *clockstr[]={"", "Global clock", "Local clock", "ClockCombined"};
\r
121 enum {GlobalClock=1, LocalClock, ClockCombined} ClockModels;
\r
123 double _rateSite=1; /* rate for a site */
\r
124 int N_PMatUVRoot=0;
\r
128 #include "treesub.c"
\r
129 #include "treespace.c"
\r
131 int main (int argc, char *argv[])
\r
133 FILE *fout, *fseq=NULL, *fpair[6];
\r
134 char pairfs[1][32]={"2base.t"};
\r
135 char rstf[512]="rst", ctlf[512]="baseml.ctl", timestr[64];
\r
136 char *Mgenestr[]={"diff. rate", "separate data", "diff. rate & pi",
\r
137 "diff. rate & kappa", "diff. rate & pi & kappa"};
\r
138 int getdistance=1, i, idata;
\r
145 com.cleandata = 0; noisy = 0; com.runmode = 0;
\r
148 com.fix_rgene = 0; /* 0: estimate rate factors for genes */
\r
150 com.getSE = 0; /* 1: want S.E.s of estimates; 0: don't want them */
\r
152 com.seqtype = 0; com.model = 0;
\r
153 com.fix_kappa = 0; com.kappa = 5;
\r
154 com.fix_alpha = 1; com.alpha = 0.; com.ncatG = 4; /* alpha=0 := inf */
\r
155 com.fix_rho = 1; com.rho = 0; com.nparK = 0;
\r
156 com.ncode = 4; com.icode = 0;
\r
157 com.print = 0; com.verbose = 1; com.fix_blength = 0;
\r
158 com.method = 0; com.space = NULL;
\r
160 printf("BASEML in %s\n", pamlVerStr);
\r
161 frub=gfopen("rub","w"); frst=gfopen(rstf,"w"); frst1=gfopen("rst1","w");
\r
163 if (argc>1) strncpy(ctlf, argv[1], 95);
\r
165 if(com.runmode != -2) finitials=fopen("in.baseml","r");
\r
166 if(com.getSE == 2) frst2 = fopen("rst2","w");
\r
168 fprintf(frst, "Supplemental results for BASEML\n\nseqf: %s\ntreef: %s\n",
\r
169 com.seqf, com.treef);
\r
170 fout = gfopen(com.outf, "w");
\r
171 fpair[0]=(FILE*)gfopen(pairfs[0],"w");
\r
173 /* for stepwise addition, com.sspace should be calculated using com.ns. */
\r
174 com.sspace = 1000000*sizeof(double);
\r
175 if((com.space = (double*)malloc(com.sspace)) == NULL)
\r
176 error2("oom space");
\r
178 if(com.clock==5 || com.clock==6)
\r
179 DatingHeteroData(fout);
\r
181 if((fseq=fopen (com.seqf,"r"))==NULL) {
\r
182 printf ("\n\nSequence file %s not found!\n", com.seqf);
\r
186 for (idata=0; idata<com.ndata; idata++) {
\r
188 printf("\nData set %4d\n", idata+1);
\r
189 fprintf(fout, "\n\nData set %4d\n", idata+1);
\r
191 fprintf(frst1, "%4d", idata+1);
\r
193 if(idata) GetOptions(ctlf); /* com.cleandata is read from here again. */
\r
194 ReadSeq ((com.verbose?fout:NULL), fseq, com.cleandata, 0);
\r
197 /* AllPatterns(fout); */
\r
199 if (com.rho && com.readpattern) error2("rho doesn't work with readpattern.");
\r
200 if(com.ndata==1) fclose(fseq);
\r
201 i = (com.ns*2-1)*sizeof(struct TREEN);
\r
202 if((nodes=(struct TREEN*)malloc(i))==NULL) error2("oom");
\r
205 if(com.ls%3!=0 || (com.ngene!=1 && com.ngene!=3))
\r
206 error2("this is not a coding sequence. Remove icode?");
\r
209 if(com.Mgene && com.ngene==1) error2 ("option Mgene for 1 gene?");
\r
210 if(com.ngene>1 && com.nhomo) error2("nhomo for mutliple genes?");
\r
211 if(com.nalpha && (!com.alpha || com.ngene==1 || com.fix_alpha))
\r
213 if(com.nalpha>1 && com.rho) error2("Malpha or rho");
\r
214 if(com.alpha==0) com.nalpha = 0;
\r
215 else com.nalpha = (com.nalpha?com.ngene:!com.fix_alpha);
\r
216 if(com.Mgene==1) com.nalpha = !com.fix_alpha;
\r
218 if(com.ngene==1) com.Mgene = 0;
\r
219 if((com.nhomo==1 && com.ngene>1) || (com.Mgene>1 && com.nhomo>=1))
\r
220 error2("nhomo does not work with Mgene options");
\r
222 if((com.Mgene>=2 && com.model==JC69) || (com.Mgene>=3 && com.model==F81)
\r
223 || ((com.Mgene==2 || com.Mgene==4) && com.model==K80)
\r
224 || (com.Mgene>1 && com.nhomo>1)
\r
225 || (com.Mgene>=2 && (com.model==UNREST||com.model==UNRESTu)))
\r
226 error2 ("model || Mgene");
\r
227 fprintf(fout,"\nBASEML (in %s) %s %s ", pamlVerStr, com.seqf, models[com.model]);
\r
228 if(com.clock) fprintf(fout," %s ",clockstr[com.clock]);
\r
229 if (!com.nparK && com.alpha && com.rho) fprintf (fout, " Auto-");
\r
230 if (com.alpha) fprintf (fout,"dGamma (ncatG=%d)", com.ncatG);
\r
231 if (com.nalpha>1) fprintf (fout,"(%d gamma)", com.nalpha);
\r
233 fprintf (fout, " (%d genes: %s) ", com.ngene, Mgenestr[com.Mgene]);
\r
235 fprintf(fout,"\nNonhomo:%2d fix_kappa%2d\n",com.nhomo, com.fix_kappa);
\r
236 if (com.nparK && com.ncatG>6)
\r
237 printf("\a\n%d rate categories for nonparametric model?\n", com.ncatG);
\r
238 if (com.nparK) fprintf(fout,"\nnparK:%4d K:%4d\n",com.nparK, com.ncatG);
\r
241 i = com.ns*(com.ns-1)/2;
\r
242 SeqDistance = (double*)realloc(SeqDistance,i*sizeof(double));
\r
243 ancestor = (int*)realloc(ancestor, i*sizeof(int));
\r
244 if(SeqDistance==NULL||ancestor==NULL) error2("oom distance&ancestor");
\r
246 InitializeBaseAA(fout);
\r
248 for(i=0; i<com.ngene; i++) xtoy(com.pi, com.piG[i], com.ncode);
\r
250 if (com.model==JC69 && !com.readpattern && !com.print) {
\r
251 PatternWeightJC69like(fout);
\r
252 if(com.verbose==2)
\r
253 fprintf(fout, "\n%6d %6d\n", com.ns, com.ls);
\r
254 for(i=0; i<com.ns; i++) {
\r
255 fprintf(fout, "\n%-30s ", com.spname[i]);
\r
256 print1seq (fout, com.z[i], com.ls, com.pose);
\r
258 fprintf(fout, "\n");
\r
261 com.sconP = (com.ns-1)*com.ncode*(size_t)com.npatt*sizeof(double);
\r
262 if((com.conP = (double*)realloc(com.conP, com.sconP))==NULL)
\r
263 error2("oom conP");
\r
264 if (com.alpha || com.nparK) {
\r
265 s2 = com.npatt*com.ncatG*sizeof(double);
\r
266 if((com.fhK=(double*)realloc(com.fhK,s2))==NULL) error2("oom");
\r
269 printf ("\n%9ld bytes for distance ",
\r
270 com.ns*(com.ns-1)/2*(sizeof(double)+sizeof(int)));
\r
271 printf("\n%9lu bytes for conP\n", com.sconP);
\r
272 printf("%9lu bytes for fhK\n%9lu bytes for space\n", s2, com.sspace);
\r
274 /* FOR(i,com.ns*2-1) xtoy(com.pi,nodes[i].pi, 4); check this? */
\r
277 DistanceMatNuc(fout,fpair[0],com.model,com.alpha);
\r
281 /* Selecting the most divergent sequences using the distance matrix
\r
285 FILE *ftmp=gfopen("newdata.txt", "w");
\r
286 int is, js, i,j, nskeep, isbest=0, isk, chosen[NS];
\r
287 double dmax, dminmax, d, dbig=9;
\r
289 if(com.model==0 && com.print==0)
\r
290 error2("choose RateAncestor = 1 to print the seqs correctly.");
\r
292 printf("\nPicking up the most different sequences.\nHow many do you want? ");
\r
293 scanf("%d", &nskeep);
\r
295 if(nskeep>=com.ns) error2("nothing to do");
\r
297 for(is=0; is<com.ns; is++) {
\r
302 for(is=0,dmax=0; is<com.ns; is++) {
\r
303 for(js=0; js<is; js++) {
\r
304 d = SeqDistance[is*(is-1)/2+js];
\r
305 if(dmax<d) { dmax = d; chosen[1]=is; chosen[0]=js; }
\r
308 keep[chosen[0]] = keep[chosen[1]] = 1;
\r
309 printf("selected seq %3d %s\n", chosen[0]+1, com.spname[chosen[0]]);
\r
310 printf("selected seq %3d %s\n", chosen[1]+1, com.spname[chosen[1]]);
\r
311 for (isk=2; isk<nskeep; isk++) {
\r
312 for(is=0,dminmax=0; is<com.ns; is++) {
\r
313 if(keep[is]) continue;
\r
314 /* d is the smallest distance to those chosen */
\r
315 for(js=0,d=dbig; chosen[js]!=-1; js++) {
\r
316 i = max2(is,chosen[js]);
\r
317 j = min2(is,chosen[js]);
\r
318 if(d>SeqDistance[i*(i-1)/2+j]) d = SeqDistance[i*(i-1)/2+j];
\r
321 dminmax = d; isbest = is;
\r
325 chosen[isk] = isbest;
\r
326 printf("selected seq %5d (dmin = %8.4f): %s\n", isbest+1, dminmax, com.spname[isbest]);
\r
329 fprintf(ftmp,"%6d %6d\n", nskeep, com.ls);
\r
330 for(j=0; j<com.ns; j++) {
\r
331 if(keep[j]==0) continue;
\r
332 fprintf(ftmp,"%-40s ", com.spname[j]);
\r
333 print1seq (ftmp, com.z[j], com.ls, com.pose);
\r
343 if (com.Mgene==1) MultipleGenes (fout, fpair, com.space);
\r
344 else if (com.runmode==0) Forestry (fout);
\r
345 else if (com.runmode==2) fprintf(frst,"%2d",StarDecomposition(fout,com.space));
\r
346 else if (com.runmode==3) StepwiseAddition (fout, com.space);
\r
347 else if (com.runmode>=4) Perturbation(fout, (com.runmode==4), com.space);
\r
349 FPN(frst); if((idata+1)%10==0) fflush(frst);
\r
350 if(com.ndata>1 && com.runmode) {
\r
351 fprintf(frst1, "\t");
\r
352 OutTreeN(frst1, 1, 0);
\r
354 FPN(frst1); fflush(frst1);
\r
356 printf("\nTime used: %s\n", printtime(timestr));
\r
359 if(com.ndata>1 && fseq) fclose(fseq);
\r
361 fclose(fout); fclose(frst); fclose(fpair[0]);
\r
362 if(finitials) { fclose(finitials); finitials=NULL; }
\r
368 /* x[]: t[ntime], rgene[ngene-1], kappa[nbranch], pi[nnode*3],
\r
369 { alpha, rho || rK[], fK[] || rK[], MK[] }
\r
373 void PrintBestTree(FILE *fout, FILE *ftree, int btree);
\r
375 int Forestry (FILE *fout)
\r
377 static int times=0;
\r
378 int status=0, inbasemlg=0, i,j=0, itree=0,ntree, np,np0,iteration=1;
\r
379 int pauptree=0, btree=0, haslength;
\r
380 double x[NP], xcom[NP-NBRANCH], lnL,lnL0=0,lnLbest=0, e=1e-7, nchange=-1;
\r
381 double xb[NP][2], tl=0, *g=NULL, *H=NULL;
\r
382 FILE *finbasemlg=NULL, *frate=NULL;
\r
384 ftree = gfopen(com.treef,"r");
\r
385 GetTreeFileType(ftree, &ntree, &pauptree, 0);
\r
387 frate = gfopen(ratef,"w");
\r
388 if (com.alpha && com.rho==0 && com.nhomo==0 && com.nparK==0 && com.ns<15) {
\r
389 inbasemlg=1; finbasemlg=gfopen("in.basemlg","w");
\r
391 flnf = gfopen("lnf","w+");
\r
392 fprintf(flnf,"%6d %6d %6d\n", ntree, com.ls, com.npatt);
\r
394 for(itree=0; ntree==-1||itree<ntree; itree++,iteration=1) {
\r
395 if(ReadTreeN(ftree,&haslength,&i,0,1))
\r
396 { puts("\nend of tree file."); break; }
\r
397 if(noisy) printf("\nTREE # %2d: ", itree+1);
\r
398 fprintf (fout,"\nTREE # %2d: ", itree+1);
\r
399 fprintf (frub,"\n\nTREE # %2d\n", itree+1);
\r
400 if(com.print) fprintf (frst,"\n\nTREE # %2d\n", itree+1);
\r
401 fprintf (flnf,"\n\n%2d\n", itree+1);
\r
404 if (com.fix_blength==2 && !haslength) error2("no branch lengths in tree");
\r
405 if (com.fix_blength>0 && !haslength) com.fix_blength=0;
\r
406 if (times++==0 && com.fix_blength>0 && haslength) {
\r
407 if(com.clock) puts("\nBranch lengths in tree are ignored");
\r
409 if(com.fix_blength==2) puts("\nBranch lengths in tree are fixed.");
\r
410 else if(com.fix_blength==1)
\r
411 puts("\nBranch lengths in tree used as initials.");
\r
412 if(com.fix_blength==1) {
\r
414 if((x[nodes[i].ibranch]=nodes[i].branch)<0)
\r
415 x[nodes[i].ibranch]=1e-5;
\r
421 nchange=MPScore (com.space);
\r
422 if(noisy&&com.ns<99)
\r
423 { OutTreeN(F0,0,0); printf(" MP score: %.2f\n",nchange);}
\r
424 OutTreeN(fout,0,0); fprintf(fout," MP score: %.2f",nchange);
\r
425 if(!com.clock && com.model<=REV && com.nhomo<=2
\r
426 && nodes[tree.root].nson<=2 && com.ns>2){
\r
427 puts("\nThis is a rooted tree, without clock. Check.");
\r
428 if(com.verbose) fputs("\nThis is a rooted tree. Please check!",fout);
\r
431 fflush(fout); fflush(flnf);
\r
433 GetInitials(x, &i);
\r
435 if(i==-1) iteration=0;
\r
437 if((np=com.np)>NP) error2("raise NP and recompile");
\r
439 if(com.sspace < spaceming2(np)) {
\r
440 com.sspace = spaceming2(np);
\r
441 if((com.space=(double*)realloc(com.space,com.sspace))==NULL)
\r
442 error2("oom space");
\r
445 if(itree) { np0 = np; }
\r
446 if(itree && !finitials && (com.nhomo==0 || com.nhomo==2))
\r
447 for (i=0; i<np-com.ntime; i++) x[com.ntime+i]=max2(xcom[i],0.001);
\r
451 lnL = com.plfun(x, np);
\r
453 printf("\nntime & nrate & np:%6d%6d%6d\n",com.ntime,com.nrate,com.np);
\r
454 if(noisy>2 && com.ns<50) { OutTreeB(F0); FPN(F0); matout(F0,x,1,np); }
\r
455 printf("\nlnL0 = %12.6f\n",-lnL);
\r
458 if (iteration && np) {
\r
459 SetxBound (np, xb);
\r
460 SetxInitials (np, x, xb); /* start within the feasible region */
\r
462 j = minB(noisy>2?frub:NULL, &lnL,x,xb, e, com.space);
\r
463 else if(com.method==3)
\r
464 j = minB2(noisy>2?frub:NULL, &lnL,x,xb, e, com.space);
\r
466 j = ming2(noisy>2?frub:NULL,&lnL,com.plfun,NULL,x,xb, com.space,e,np);
\r
468 if (j==-1 || lnL<=0 || lnL>1e7) status = -1;
\r
472 if (itree==0) { lnL0=lnLbest=lnL; btree=0; }
\r
473 else if (lnL<lnLbest) {
\r
474 lnLbest=lnL; btree=itree;
\r
476 if (noisy) printf ("Out...\nlnL = %12.6f\n", -lnL);
\r
477 fprintf(fout,"\nlnL(ntime:%3d np:%3d): %13.6f %+12.6f\n",
\r
478 com.ntime, np, -lnL, -lnL+lnL0);
\r
479 if(com.fix_blength<2) { OutTreeB (fout); FPN (fout); }
\r
482 if((com.npi && com.model!=T92) || com.nparK>=2) TransformxBack(x);
\r
484 if(com.clock) { /* move times into x[] */
\r
485 for(i=0,j=!nodes[tree.root].fossil; i<tree.nnode; i++)
\r
486 if(i!=tree.root && nodes[i].nson && !nodes[i].fossil)
\r
487 x[j++]=nodes[i].age;
\r
489 for(i=0; i<np; i++)
\r
490 fprintf(fout," %8.6f", x[i]);
\r
491 FPN(fout); fflush(fout);
\r
492 if(inbasemlg) matout(finbasemlg, x, 1, np);
\r
495 for(i=0;i<np;i++) fprintf(frst1,"\t%.6f", x[i]);
\r
496 fprintf(frst1,"\t%.4f", -lnL);
\r
499 puts("Calculating SE's");
\r
500 if(com.sspace < np*(np+1)*sizeof(double)) {
\r
501 com.sspace = np*(np+1)*sizeof(double);
\r
502 if((com.space=(double*)realloc(com.space,com.sspace))==NULL)
\r
503 error2("oom space for SE");
\r
508 HessianSKT2004 (x, lnL, g, H);
\r
509 if(com.getSE>=2 && com.clock==0 && nodes[tree.root].nson==3) { /* g & H */
\r
510 fprintf(frst2,"\n %d\n\n", com.ns);
\r
511 OutTreeN(frst2, 1, 1); fprintf(frst2,"\n\n");
\r
512 for(i=0; i<com.ntime; i++)
\r
513 if(x[i]>0.0004 && fabs(g[i])<0.005) g[i] = 0;
\r
514 for(i=0; i<com.ntime; i++) fprintf(frst2," %9.6f", x[i]); fprintf(frst2, "\n\n");
\r
515 for(i=0; i<com.ntime; i++) fprintf(frst2," %9.6f", g[i]); fprintf(frst2, "\n\n");
\r
516 fprintf(frst2, "\nHessian\n\n");
\r
517 for(i=0; i<com.ntime; i++,FPN(frst2))
\r
518 for(j=0; j<com.ntime; j++)
\r
519 fprintf(frst2," %10.4g", H[i*np+j]);
\r
522 for(i=0; i<np*np; i++) H[i] *= -1;
\r
523 matinv(H, np, np, H+np*np);
\r
524 fprintf(fout,"SEs for parameters:\n");
\r
525 for(i=0; i<np; i++)
\r
526 fprintf(fout," %8.6f", (H[i*np+i]>0. ? sqrt(H[i*np+i]) : -1));
\r
530 /* if(com.clock) SetBranch(x); */
\r
531 /* GroupDistances(); */
\r
534 fprintf(fout,"\nWarning: branch rates are not yet applied in tree length and branch lengths");
\r
536 fprintf(fout,"\nNote: mutation rate is not applied to tree length. Tree has times, for TreeView & FigTree\n");
\r
537 for(i=0; i<tree.nnode; i++)
\r
538 nodes[i].branch *= com.TipDate_TimeUnit;
\r
541 if(!AbsoluteRate) {
\r
542 for(i=0,tl=0;i<tree.nnode;i++)
\r
543 if(i!=tree.root) tl += nodes[i].branch;
\r
544 fprintf(fout,"\ntree length = %9.5f%s\n", tl, (com.ngene>1?" (1st gene)":""));
\r
546 FPN(fout); OutTreeN(fout,1,0); FPN(fout);
\r
547 FPN(fout); OutTreeN(fout,1,1); FPN(fout);
\r
549 FPN(fout); OutTreeN(fout,1,PrNodeNum); FPN(fout);
\r
552 if(com.TipDate) { /* scale back the times in nodes[].branch */
\r
553 for(i=0; i<tree.nnode; i++) nodes[i].branch /= com.TipDate_TimeUnit;
\r
555 if(com.np-com.ntime || com.clock)
\r
556 DetailOutput(fout, x, H);
\r
558 printf ("convergence?\n"); fprintf (fout,"check convergence..\n");
\r
561 for (i=0; i<np-com.ntime; i++) xcom[i]=x[com.ntime+i];
\r
563 for (i=0; i<np-com.ntime; i++) xcom[i]=xcom[i]*.8+x[com.ntime+i]*0.2;
\r
565 TestModel(fout,x,1,com.space);
\r
566 fprintf(fout,"\n\n# of lfun calls:%10d\n", NFunCall);
\r
570 if(com.coding && com.Mgene!=1 && !AbsoluteRate) {
\r
571 fputs("\nTree with branch lengths for codon models:\n",fout);
\r
572 FOR(i,tree.nnode) nodes[i].branch *= (1+com.rgene[1]+com.rgene[2]);
\r
573 FPN(fout); OutTreeN(fout,1,1); FPN(fout);
\r
574 FOR(i,tree.nnode) nodes[i].branch /= (1+com.rgene[1]+com.rgene[2]);
\r
577 com.print-=9; com.plfun(x, np); com.print+=9;
\r
579 if(com.plfun != lfun)
\r
580 lfunRates(frate, x, np);
\r
582 /** think about more-general models. Check that this may not be working for the clock models clock>=2 **/
\r
583 if((com.nhomo<=2 && com.nparK==0 && com.model<=REV && com.rho==0)
\r
584 || (com.nhomo>2 && com.alpha==0 && com.Mgene==0))
\r
585 AncestralSeqs(frst, x);
\r
587 } /* for (itree) */
\r
590 fprintf(frst1, "\t%d\t", btree+1);
\r
591 PrintBestTree(frst1, ftree, btree);
\r
593 if(finbasemlg) fclose(finbasemlg); if (frate) fclose(frate);
\r
596 if(ntree==-1) ntree=itree;
\r
597 if(ntree>1) { rewind(flnf); rell(flnf,fout,ntree); }
\r
604 void PrintBestTree(FILE *fout, FILE *ftree, int btree)
\r
606 int itree, ntree, i,j;
\r
609 GetTreeFileType(ftree,&ntree,&i,0);
\r
610 for(itree=0; ntree==-1||itree<ntree; itree++) {
\r
611 if(ReadTreeN(ftree,&i,&j,0,1))
\r
612 { puts("\nend of tree file."); break; }
\r
614 OutTreeN(fout, 1, 0);
\r
619 int TransformxBack(double x[])
\r
621 /* transform variables x[] back to their original definition after iteration,
\r
622 for output and for calculating SEs.
\r
624 int i,k, K=com.ncatG;
\r
626 k=com.ntime+com.nrgene+com.nrate;
\r
627 for (i=0; i<com.npi; i++)
\r
628 f_and_x(x+k+3*i,x+k+3*i,4,0,0);
\r
630 k+=com.npi*3 + K-1; /* K-1 for rK */
\r
631 if (com.nparK==2) /* rK & fK */
\r
632 f_and_x(x+k,x+k,K,0,0);
\r
633 else if (com.nparK==3) /* rK & MK (double stochastic matrix) */
\r
634 for (i=0; i<K-1; k+=K-1,i++) f_and_x(x+k,x+k,K,0,0);
\r
635 else if (com.nparK==4) /* rK & MK */
\r
636 for (i=0; i<K; k+=K-1,i++) f_and_x(x+k,x+k,K,0,0);
\r
641 void DetailOutput (FILE *fout, double x[], double var[])
\r
643 int n=com.ncode, i,j,k=com.ntime, nkappa[]={0, 1, 0, 1, 1, 1, 2, 5, 11};
\r
644 int n31pi=(com.model==T92?1:3);
\r
645 double Qfactor,*p=com.pi, t=0, k1,k2, S,V, Y=p[0]+p[1],R=p[2]+p[3];
\r
647 double *Qrate=x+com.ntime+com.nrgene, *AncientFreqs=NULL;
\r
648 double EXij[NCODE*NCODE]={0}, *Q=PMat, p0[NCODE], c[NCODE], ya;
\r
650 fprintf(fout,"\nDetailed output identifying parameters\n");
\r
651 if(com.clock) OutputTimesRates(fout, x, var);
\r
654 if (com.nrgene) { /* this used to be: if (com.nrgene && !com.clock) */
\r
655 fprintf (fout, "\nrates for %d genes:%6.0f", com.ngene, 1.);
\r
656 for(i=0; i<com.nrgene; i++)
\r
657 fprintf (fout, " %8.5f", x[k++]);
\r
662 if(com.nrate) fprintf (fout, "rate (kappa or abcde) under %s:", models[com.model]);
\r
663 for(i=0; i<com.nrate; i++) fprintf (fout, " %8.5f", x[k++]); FPN(fout);
\r
664 fprintf (fout, "Base frequencies:\n");
\r
665 for(j=0; j<n; j++) fprintf (fout, " %8.5f", com.pi[j]);
\r
668 else if(com.nhomo>=3) {
\r
669 if(!com.fix_kappa && (com.model==F84 || com.model==HKY85))
\r
670 fprintf (fout, "kappa or abcde under %s (in order of branches):", models[com.model]);
\r
672 fprintf (fout, "kappa or abcde under %s:", models[com.model]);
\r
673 for(i=0; i<com.nrate; i++) fprintf(fout," %8.5f", x[k++]); FPN(fout);
\r
676 if((AncientFreqs=(double*)malloc(tree.nnode*4*sizeof(double)))==NULL)
\r
677 error2("out of memory for OldDistributions()");
\r
678 OldDistributions (tree.root, AncientFreqs);
\r
680 fputs("\n(frequency parameters for branches) [frequencies at nodes] (see Yang & Roberts 1995 fig 1)\n\n",fout);
\r
681 for(i=0; i<tree.nnode; i++,FPN(fout)) {
\r
682 fprintf (fout, "Node #%2d (", i+1);
\r
683 for(j=0; j<4; j++) fprintf (fout, " %7.5f ", nodes[i].pi[j]);
\r
686 fprintf(fout," [");
\r
687 for(j=0; j<4; j++) fprintf(fout," %7.5f", AncientFreqs[i*4+j]);
\r
688 fprintf(fout," GC = %5.3f ]", AncientFreqs[i*4+1]+AncientFreqs[i*4+3]);
\r
691 fprintf(fout,"\nNote: node %d is root.\n", tree.root+1);
\r
692 k += com.npi*n31pi;
\r
694 /* print expected numbers of changes along branches */
\r
695 if(com.print && com.alpha==0 && com.verbose) {
\r
696 fprintf(fout, "\n\nExpected numbers of nucleotide changes on branches\n\n");
\r
697 for(inode=0; inode<tree.nnode; inode++,FPN(fout)) {
\r
698 if(inode==tree.root) continue;
\r
699 t = nodes[inode].branch;
\r
700 xtoy(AncientFreqs+nodes[inode].father*n, p0, n);
\r
702 if (com.nhomo>2 && com.model<=TN93) {
\r
703 eigenTN93(com.model, *nodes[inode].pkappa, *(nodes[inode].pkappa+1), nodes[inode].pi, &nR, Root, Cijk);
\r
704 QTN93(com.model, Q, *nodes[inode].pkappa, *(nodes[inode].pkappa+1), nodes[inode].pi);
\r
706 else if (com.nhomo>2 && com.model==REV)
\r
707 eigenQREVbase(NULL, Q, nodes[inode].pkappa, nodes[inode].pi, &nR, Root, Cijk);
\r
709 for(i=0; i<n; i++) /* calculate correction vector c[4] */
\r
710 { c[i] = 0; Q[i*n+i] = 0; }
\r
712 for(i=0; i<n; i++) { /* calculate correction vector c[4] */
\r
713 for(a=1; a<n; a++) {
\r
714 ya = -(1 - exp(Root[a]*t))/Root[a];
\r
716 c[i] += p0[k]*Cijk[k*n*n+i*n+a]*ya;
\r
719 EXij[i*n+j] = Q[i*n+j] * (nodes[inode].pi[i]*t + c[i]);
\r
721 fprintf(fout, "Node #%2d ", inode+1);
\r
722 if(inode<com.ns) fprintf(fout, "%-10s ", com.spname[inode]);
\r
723 fprintf(fout, "(blength = %9.6f, %9.6f)", t, sum(EXij,n*n));
\r
724 fprintf(fout, " %s: ", (com.model==HKY85 ? "kappa" : "abcde"));
\r
725 for(j=0; j<(com.model==HKY85?1:5); j++) fprintf(fout, " %9.6f", *(nodes[inode].pkappa+j));
\r
726 fprintf(fout, "\npi source: "); for(j=0; j<n; j++) fprintf(fout, " %7.5f ", p0[j]);
\r
727 fprintf(fout, "\npi model : "); for(j=0; j<n; j++) fprintf(fout, " %7.5f ", nodes[inode].pi[j]);
\r
728 fprintf(fout, "\nExij\n");
\r
729 for(i=0; i<n; i++, FPN(fout)) {
\r
730 for(j=0; j<n; j++) fprintf(fout, " %9.6f", EXij[i*n+j]);
\r
731 fprintf(fout, " ");
\r
732 for(j=0; j<n; j++) fprintf(fout, " %8.1f", EXij[i*n+j]*com.ls);
\r
734 /* matout(fout, c, 1, n); */
\r
738 if(com.alpha==0) free(AncientFreqs);
\r
740 else if (!com.fix_kappa) {
\r
741 fprintf(fout,"\nParameters %s in the rate matrix", (com.model<=TN93 ? "(kappa)" : ""));
\r
742 fprintf(fout," (%s) (Yang 1994 J Mol Evol 39:105-111):\n", models[com.model]);
\r
744 if (com.nhomo==2) {
\r
745 fprintf (fout, "\nbranch t kappa TS TV\n");
\r
746 for(i=0; i<tree.nbranch; i++) {
\r
747 if (com.model==F84) { k1 = 1+x[k+i]/R; k2 = 1+x[k+i]/R; }
\r
748 else k1 = k2=x[k+i];
\r
749 S = 2*p[0]*p[1]*k1+2*p[2]*p[3]*k2;
\r
752 /* t=(com.clock ? nodes[tree.branches[i][1]].branch : x[i]); */
\r
753 t = nodes[tree.branches[i][1]].branch;
\r
754 fprintf(fout,"%2d..%-2d %9.5f %8.5f %9.5f %8.5f\n",
\r
755 tree.branches[i][0]+1,tree.branches[i][1]+1, t,x[k+i],
\r
756 t*S/(S+V), t*V/(S+V));
\r
759 /* Mgene = 0:rates, 1:separate; 2:diff pi, 3:diff kapa, 4:all diff*/
\r
760 else if (com.Mgene>=2) {
\r
761 for(i=0; i<com.ngene; i++) {
\r
762 fprintf (fout, "\nGene #%d: ", i+1);
\r
763 p = (com.Mgene==3 ? com.pi : com.piG[i]);
\r
764 Qrate = (com.Mgene==2 ? x+k : x+k+i*nkappa[com.model]);
\r
765 if (com.model<=TN93)
\r
766 FOR (j,nkappa[com.model]) fprintf(fout," %8.5f", Qrate[j]);
\r
767 else if (com.model==REV || com.model==REVu)
\r
768 /* output Q matrix, no eigen calculation */
\r
769 eigenQREVbase(fout, Q, Qrate, p, &nR, Root, Cijk);
\r
771 if (com.Mgene>=3) k+=com.ngene*nkappa[com.model];
\r
772 else k+=nkappa[com.model];
\r
775 if (com.model<REV) FOR (i,com.nrate) fprintf (fout, " %8.5f", x[k++]);
\r
782 if ((com.model==REV || com.model==REVu) && com.nhomo<=2) /* output Q, no eigen calculation */
\r
783 eigenQREVbase(fout, Q, Qrate, com.pi, &nR, Root, Cijk);
\r
784 else if (com.model==UNREST || com.model==UNRESTu)
\r
785 QUNREST (fout, PMat, x+com.ntime+com.nrgene, com.pi);
\r
788 for(j=0; j<com.nalpha; j++) {
\r
789 if (!com.fix_alpha)
\r
790 fprintf(fout,"\nalpha (gamma, K=%d) = %8.5f", com.ncatG,(com.alpha=x[k++]));
\r
792 DiscreteGamma(com.freqK,com.rK,com.alpha,com.alpha,com.ncatG,DGammaUseMedian);
\r
793 fprintf(fout, "\nrate: "); FOR(i,com.ncatG) fprintf(fout," %8.5f",com.rK[i]);
\r
794 fprintf(fout, "\nfreq: "); FOR(i,com.ncatG) fprintf(fout," %8.5f",com.freqK[i]);
\r
797 if (!com.fix_rho) {
\r
798 fprintf (fout, "rho for the auto-discrete-gamma model: %9.5f",x[k]);
\r
801 if (com.nparK>=1 && com.nalpha<=1) {
\r
802 fprintf(fout, "\nrate:"); FOR(i,com.ncatG) fprintf(fout," %8.5f",com.rK[i]);
\r
803 fprintf(fout, "\nfreq:"); FOR(i,com.ncatG) fprintf(fout," %8.5f",com.freqK[i]);
\r
806 if (com.rho || (com.nparK>=3 && com.nalpha<=1)) {
\r
807 fprintf (fout, "transition probabilities between rate categories:\n");
\r
808 for(i=0;i<com.ncatG;i++,FPN(fout)) FOR(j,com.ncatG)
\r
809 fprintf (fout, " %8.5f", com.MK[i*com.ncatG+j]);
\r
814 int GetStepMatrix(char*line)
\r
816 /* read user definitions of the REV and UNREST models
\r
818 -1 at diagonals, 0 for default rate, and positive values for rates
\r
820 char *p,*errstr="StepMatrix specification in the control file";
\r
821 int n=com.ncode, i, j, k, b1=-1,b2;
\r
823 p = strchr(line,'[');
\r
824 if(p==NULL) error2("model specification. Expecting '['.");
\r
825 sscanf(++p, "%d", &com.nrate);
\r
826 if(com.nrate<0 || (com.model==REVu && com.nrate>5) || (com.model==UNRESTu && com.nrate>11))
\r
828 for(i=0; i<n; i++) for(j=0; j<n; j++)
\r
829 StepMatrix[i*n+j] = (i==j ? -1 : 0);
\r
830 for(i=0; i<com.nrate; i++) {
\r
831 while(*p && *p!='(') p++;
\r
832 if(*p++ !='(') error2( "expecting (" );
\r
833 for(k=0; k<12; k++) {
\r
834 while (isspace(*p)) p++;
\r
836 b1 = CodeChara(*p++,0);
\r
837 b2 = CodeChara(*p++,0);
\r
838 if(b1<0 || b1>3 || b2<0 || b2>3) error2("bases out of range.");
\r
839 if(b1==b2 || StepMatrix[b1*n+b2]>0) {
\r
840 printf("pair %c%c already specified.\n", BASEs[b1], BASEs[b2]);
\r
842 if(com.model==REVu) StepMatrix[b1*n+b2] = StepMatrix[b2*n+b1] = i+1;
\r
843 else StepMatrix[b1*n+b2] = i+1;
\r
845 printf("rate %d: %d pairs\n", i+1,k);
\r
847 for(i=0; i<n; i++,FPN(F0)) for(j=0; j<n; j++)
\r
848 printf("%3d", StepMatrix[i*n+j]);
\r
853 int GetOptions (char *ctlf)
\r
855 int iopt, i, j, nopt=31, lline=4096;
\r
856 char line[4096], *pline, opt[32], *comment="*#";
\r
857 char *optstr[]={"seqfile","outfile","treefile","noisy", "cleandata",
\r
858 "verbose","runmode", "method", "clock", "TipDate", "fix_rgene","Mgene", "nhomo",
\r
859 "getSE","RateAncestor", "model","fix_kappa","kappa",
\r
860 "fix_alpha","alpha","Malpha","ncatG", "fix_rho","rho",
\r
861 "nparK", "ndata", "bootstrap", "Small_Diff","icode", "fix_blength", "seqtype"};
\r
864 int ng=-1, markgenes[NGENE];
\r
867 fctl = gfopen(ctlf,"r");
\r
868 if(noisy) printf ("Reading options from %s..\n", ctlf);
\r
870 if (fgets(line, lline, fctl) == NULL) break;
\r
871 for (i=0,t=0,pline=line; i<lline&&line[i]; i++)
\r
872 if (isalnum(line[i])) { t=1; break; }
\r
873 else if (strchr(comment,line[i])) break;
\r
874 if (t==0) continue;
\r
875 sscanf (line, "%s%*s%lf", opt, &t);
\r
876 if ((pline=strstr(line, "="))==NULL)
\r
877 error2("option file.");
\r
879 for (iopt=0; iopt<nopt; iopt++) {
\r
880 if (strncmp(opt, optstr[iopt], 8)==0) {
\r
882 printf ("\n%3d %15s | %-20s %6.2f", iopt+1,optstr[iopt],opt,t);
\r
884 case ( 0): sscanf(pline+1, "%s", com.seqf); break;
\r
885 case ( 1): sscanf(pline+1, "%s", com.outf); break;
\r
886 case ( 2): sscanf(pline+1, "%s", com.treef); break;
\r
887 case ( 3): noisy=(int)t; break;
\r
888 case ( 4): com.cleandata=(char)t; break;
\r
889 case ( 5): com.verbose=(int)t; break;
\r
890 case ( 6): com.runmode=(int)t; break;
\r
891 case ( 7): com.method=(int)t; break;
\r
892 case ( 8): com.clock=(int)t; break;
\r
894 sscanf(pline+1, "%lf%lf", &com.TipDate, &com.TipDate_TimeUnit);
\r
896 case (10): com.fix_rgene=(int)t; break;
\r
897 case (11): com.Mgene=(int)t; break;
\r
898 case (12): com.nhomo=(int)t; break;
\r
899 case (13): com.getSE=(int)t; break;
\r
900 case (14): com.print=(int)t; break;
\r
901 case (15): com.model=(int)t;
\r
902 if(com.model>UNREST) GetStepMatrix(line); break;
\r
903 case (16): com.fix_kappa=(int)t; break;
\r
906 if(com.fix_kappa && (com.clock==5 || com.clock==6)
\r
907 && com.model!=0 && com.model!=2) {
\r
908 ng = splitline (++pline, markgenes);
\r
909 for(j=0; j<min2(ng,com.ndata); j++)
\r
910 if(!sscanf(pline+markgenes[j], "%lf", &data.kappa[j])) break;
\r
912 matout(F0, data.kappa, 1, min2(ng,com.ndata));
\r
916 case (18): com.fix_alpha=(int)t; break;
\r
919 if(com.fix_alpha && t && (com.clock==5 || com.clock==6)) {
\r
920 ng = splitline (++pline, markgenes);
\r
921 for(j=0; j<min2(ng,com.ndata); j++)
\r
922 if(!sscanf(pline+markgenes[j], "%lf", &data.alpha[j])) break;
\r
924 matout(F0, data.alpha, 1, min2(ng,com.ndata));
\r
928 case (20): com.nalpha=(int)t; break;
\r
929 case (21): com.ncatG=(int)t; break;
\r
930 case (22): com.fix_rho=(int)t; break;
\r
931 case (23): com.rho=t; break;
\r
932 case (24): com.nparK=(int)t; break;
\r
933 case (25): com.ndata=(int)t; break;
\r
934 case (26): com.bootstrap=(int)t; break;
\r
935 case (27): Small_Diff=t; break;
\r
936 case (28): com.icode=(int)t; com.coding=1; break;
\r
937 case (29): com.fix_blength=(int)t; break;
\r
939 com.seqtype=(int)t;
\r
940 if(com.seqtype==2) com.ncode = 2;
\r
941 else if(com.seqtype==5) com.ncode = 5;
\r
948 { printf ("\noption %s in %s not recognised\n", opt,ctlf); exit(-1); }
\r
952 if((com.fix_kappa || (com.fix_alpha&&com.alpha)) && (com.clock==5 || com.clock==6))
\r
953 printf("Using parameters from the control file.");
\r
956 if(com.seqtype!=0 && com.seqtype!=4 && com.seqtype!=5) error2("seqtype?");
\r
957 if (com.fix_alpha==1 && com.alpha==0) {
\r
958 if (!com.fix_rho || com.rho) error2("fix rho to 0 if alpha=0.");
\r
960 if (com.nparK>=1) {
\r
961 com.fix_alpha = com.fix_rho=1;
\r
962 if(com.alpha==0) com.alpha = 0.5; /* used to generate rK as initial values */
\r
963 if(com.nparK<=2) com.rho = 0;
\r
964 else com.rho = 0.4;
\r
966 error2("nhomo & nparK");
\r
968 if(com.model!=F84 && com.kappa<=0) error2("init kappa..");
\r
969 if(!com.fix_alpha && com.alpha<=0) error2("init alpha..");
\r
970 if(!com.fix_rho && com.rho==0) { com.rho=0.001; puts("init rho reset"); }
\r
973 { if (com.ncatG<2 || com.ncatG>NCATG) error2 ("ncatG"); }
\r
974 else if (com.ncatG>1) com.ncatG=1;
\r
976 if(com.method &&(com.clock||com.rho))
\r
977 { com.method=0; puts("Iteration method reset: method = 0"); }
\r
978 if(com.method && (com.model==UNREST||com.model==UNRESTu))
\r
979 error2("I did not implemented method = 1 for UNREST. Use method = 0");
\r
981 if(com.model>UNREST && com.Mgene>2)
\r
982 error2("u models don't work with Mgene");
\r
986 else if (com.nhomo==2) {
\r
987 if (com.model!=K80 && com.model!=F84 && com.model!=HKY85) error2("nhomo");
\r
989 else if (com.nhomo>2 && !(com.model>=F84 && com.model<=REV)) error2("nhomo & model");
\r
990 else if (com.nhomo>2 && com.method) error2("nhomo & method.");
\r
992 if (com.nhomo==1 && !(com.model>=F81 && com.model<=REV) && com.model!=REVu)
\r
993 error2("nhomo=1 and model");
\r
996 if(com.nhomo>=2 && com.clock==2) error2("clock=2 & nhomo imcompatible");
\r
998 if(com.clock>=5 && com.model>=TN93) error2("model & clock imcompatible");
\r
1000 if (com.nhomo>1 && com.runmode>0) error2("nhomo incompatible with runmode");
\r
1001 if (com.runmode==-2) error2("runmode = -2 not implemented in baseml.");
\r
1002 if (com.clock && com.runmode>2) error2("runmode & clock?");
\r
1003 if (com.runmode) com.fix_blength=0;
\r
1004 if (com.runmode==3 && (com.npi || com.nparK))
\r
1005 error2("runmode incompatible with nparK or nhomo.");
\r
1007 if (com.model==JC69 || com.model==K80 || com.model==UNREST)
\r
1008 if (com.nhomo!=2) com.nhomo=0;
\r
1009 if (com.model==JC69 || com.model==F81) { com.fix_kappa=1; com.kappa=1; }
\r
1010 if ((com.model==TN93 || com.model==REV || com.model==REVu) && com.nhomo<=2)
\r
1012 if (com.seqtype==5 && com.model!=REVu)
\r
1013 error2("RNA-editing model requires REVu");
\r
1015 if (com.nparK==3) {
\r
1016 puts("\n\nnparK==3, double stochastic, may not work. Use nparK=4?\n");
\r
1019 com.fix_omega=1; com.omega=0;
\r
1020 if(com.ndata<=0) com.ndata=1;
\r
1021 if(com.bootstrap && com.ndata!=1) error2("ndata=1 for bootstrap.");
\r
1027 int GetInitials (double x[], int *fromfile)
\r
1029 /* This calculates com.np, com.ntime, com.npi, com.nrate etc., and gets
\r
1030 initial values for ML iteration. It also calculates some of the
\r
1031 statistics (such as eigen values and vectors) that won't change during
\r
1032 the likelihood iteration. However, think about whether this is worthwhile.
\r
1033 The readfile option defeats this effort right now as the x[] is read in
\r
1034 after such calculations. Some of the effort is duplicated in
\r
1035 SetParameters(). Needs more careful thinking.
\r
1037 int i,j,k, K=com.ncatG, n31pi=(com.model==T92?1:3);
\r
1038 int nkappa[] = {0, 1, 0, 1, 1, 1, 2, 5, 11, -1, -1}, nkappasets;
\r
1039 size_t sconP_new=(size_t)(tree.nnode-com.ns)*com.ncode*com.npatt*com.ncatG*sizeof(double);
\r
1042 NFunCall = NPMatUVRoot = NEigenQ = 0;
\r
1043 if(com.clock==ClockCombined && com.ngene<=1)
\r
1044 error2("Combined clock model requires mutliple genes.");
\r
1045 GetInitialsTimes (x);
\r
1047 com.plfun = lfunAdG;
\r
1048 if (com.alpha==0 && com.nparK==0)
\r
1050 else if ((com.alpha && com.rho==0) || com.nparK==1 || com.nparK==2)
\r
1051 com.plfun = lfundG;
\r
1053 if(com.clock && com.fix_blength==-1)
\r
1054 com.fix_blength = 0;
\r
1056 if(com.method && com.fix_blength!=2 && com.plfun==lfundG) {
\r
1057 com.conPSiteClass = 1;
\r
1058 if(com.sconP < sconP_new) {
\r
1059 com.sconP = sconP_new;
\r
1060 printf("\n%9lu bytes for conP, adjusted\n", com.sconP);
\r
1061 if((com.conP = (double*)realloc(com.conP, com.sconP))==NULL)
\r
1062 error2("oom conP");
\r
1065 InitializeNodeScale();
\r
1067 com.nrgene = (!com.fix_rgene)*(com.ngene-1);
\r
1068 for(j=0; j<com.nrgene; j++) x[com.ntime+j]=1;
\r
1069 if (com.fix_kappa && (com.Mgene==3 || com.Mgene==4)) error2("Mgene options");
\r
1071 if(com.model<=UNREST) {
\r
1073 if (!com.fix_kappa) {
\r
1074 com.nrate = nkappa[com.model];
\r
1076 com.nrate *= com.ngene;
\r
1079 switch (com.nhomo) {
\r
1080 case (0): com.npi = 0; break; /* given 1 pi */
\r
1081 case (1): com.npi = 1; break; /* solve 1 pi */
\r
1082 case (2): com.npi = 0; com.nrate=tree.nbranch; break; /* b kappa's */
\r
1084 com.npi = com.ns + (tree.root>=com.ns) + (tree.nnode>com.ns+1);
\r
1085 com.nrate = (!com.fix_kappa && (com.model==F84 || com.model==HKY85) ? tree.nbranch : nkappa[com.model]);
\r
1086 for(i=0; i<tree.nnode; i++)
\r
1087 nodes[i].label = (i<com.ns ? i : com.ns);
\r
1088 if(tree.root>=com.ns)
\r
1089 nodes[tree.root].label = com.ns+1;
\r
1090 break; /* ns+2 pi */
\r
1092 com.npi = tree.nnode; /* nnode pi */
\r
1093 com.nrate = nkappa[com.model]*(com.fix_kappa ? 1 : tree.nbranch);
\r
1096 /* com.nrate = (!com.fix_kappa && (com.model==F84 || com.model==HKY85) ? tree.nbranch : nkappa[com.model]); */
\r
1097 com.nrate = nkappa[com.model]*(com.fix_kappa ? 1 : tree.nbranch);
\r
1098 for(i=0,com.npi=0; i<tree.nnode; i++) {
\r
1099 j = (int)nodes[i].label;
\r
1102 if(i!=tree.root && (j<0 || j>tree.nnode-1))
\r
1103 puts("node label in tree.");
\r
1105 if(tree.root>=com.ns)
\r
1106 nodes[tree.root].label = com.npi++;
\r
1108 printf("%d sets of frequency parameters\n",com.npi);
\r
1109 break; /* user-specified pi */
\r
1112 if (com.model<=TN93 && com.Mgene<=1 && com.nhomo==0)
\r
1113 eigenTN93 (com.model,com.kappa,com.kappa,com.pi,&nR,Root,Cijk);
\r
1114 if (com.model==REV || com.model==UNREST)
\r
1115 for(j=0; j<(com.Mgene>=3?com.ngene:1); j++) {
\r
1116 k = com.ntime + com.nrgene + j*(com.model==REV?5:11);
\r
1117 for(i=0; i<com.nrate; i++)
\r
1118 x[k+i] = 0.4 + 0.2*rndu();
\r
1119 if(com.model==UNREST)
\r
1120 x[k] = x[k+3] = x[k+8] = 0.8+0.2*rndu();
\r
1121 if (com.model==REV) {
\r
1122 nkappasets = com.nrate/5;
\r
1123 for(j=0; j<nkappasets; j++)
\r
1124 x[k+j*5] = 1 + 0.1*(j+1); /* 0.8+0.2*rndu(); */
\r
1128 for(i=0; i<com.nrate; i++)
\r
1129 x[com.ntime+com.nrgene+i] = 0.02 + com.kappa*(.8+0.4*rndu());
\r
1131 for(i=0; i<com.npi*n31pi; i++) /* initials for transformed pi's */
\r
1132 x[com.ntime+com.nrgene+com.nrate+i] = rndu()*.2;
\r
1133 com.np = k = com.ntime + com.nrgene + com.nrate + com.npi*n31pi;
\r
1135 if (com.alpha || com.nparK) {
\r
1136 for (i=0; i<com.nalpha; i++)
\r
1137 x[k++] = com.alpha = 0.01+com.alpha*(.9+0.2*rndu());
\r
1139 if (!com.fix_rho) x[k++] = com.rho = 0.01+com.rho*(.8+0.4*rndu());
\r
1142 AutodGamma(com.MK, com.freqK, com.rK, &t, com.alpha,com.rho,K);
\r
1144 DiscreteGamma (com.freqK, com.rK, com.alpha, com.alpha, K, DGammaUseMedian);
\r
1147 xtoy(com.rK, x+k, K-1);
\r
1150 switch (com.nparK) {
\r
1151 case (2): /* rK & fK */
\r
1152 /* zero(x+k, K-1); */
\r
1153 for (i=0; i<K-1; i++) x[k+i] = 0.1*(0.5-rndu());
\r
1156 case (3): /* rK & MK (double stochastic) */
\r
1157 /* zero(x+k, (K-1)*(K-1)); */
\r
1158 for (i=0; i<(K-1)*(K-1); i++) x[k+i] = 0.1*(0.5-rndu());
\r
1161 case (4): /* rK & MK */
\r
1162 /* zero(x+k, K*(K-1)); */
\r
1163 for (i=0; i<K*(K-1); i++) x[k+i] = 0.1*(0.5-rndu());
\r
1170 if(com.fix_blength==-1)
\r
1171 for(i=0; i<com.np; i++) x[i] = (i<com.ntime ? .1+rndu() : .5+rndu());
\r
1173 if(finitials) readx(x, fromfile);
\r
1182 int SetParameters(double x[])
\r
1184 /* This sets parameters in com., nodes[], etc and is called before lfun() etc.
\r
1185 Iinitialize U, V, Root etc, if necessary.
\r
1186 For nhomo models (nhomo=1,3,4)
\r
1187 x[] has frequencies if (LASTROUND==1) or exp(pi)/(1+SUM(exp[pi])) if otherwise
\r
1189 int i, j, k, K=com.ncatG, status=0, n31pi=(com.model==T92?1:3);
\r
1190 int nkappa[] = {0, 1, 0, 1, 1, 1, 2, 5, 11, -1, -1};
\r
1191 double k1=com.kappa, k2=com.kappa, t, space[NCATG*(NCATG+1)];
\r
1193 if(com.clock>=5) return(0);
\r
1194 if(com.fix_blength<2) SetBranch(x);
\r
1196 for(i=0; i<com.nrgene; i++) com.rgene[i+1] = x[com.ntime+i];
\r
1197 if(com.clock && com.clock<5 && AbsoluteRate)
\r
1198 com.rgene[0] = x[0]; /* so that rgene are absolute rates */
\r
1200 if (!com.fix_kappa && com.model<=TN93 && com.clock<5) {
\r
1201 com.kappa = k1 = k2 = x[com.ntime+com.nrgene];
\r
1202 if (com.model==TN93) k2 = x[com.ntime+com.nrgene+1];
\r
1204 if (com.nhomo==1) {
\r
1205 k = com.ntime+com.nrgene+com.nrate;
\r
1206 if(com.model==T92)
\r
1207 { com.pi[0] = com.pi[2] = (1-x[k])/2; com.pi[1] = com.pi[3]=x[k]/2; }
\r
1209 if (!LASTROUND) f_and_x(x+k, com.pi, 4, 0, 0);
\r
1210 else xtoy (x+k, com.pi, 3);
\r
1211 com.pi[3] = 1-sum(com.pi,3);
\r
1213 if (com.model<=TN93)
\r
1214 eigenTN93(com.model, k1, k2, com.pi, &nR, Root, Cijk);
\r
1216 else if (com.nhomo==2)
\r
1217 for (i=0,k=com.ntime+com.nrgene; i<tree.nbranch; i++)
\r
1218 nodes[tree.branches[i][1]].pkappa = x+k+i;
\r
1220 if (com.model<=TN93 && com.nhomo==0 && com.Mgene<=1)
\r
1221 RootTN93 (com.model, k1, k2, com.pi, &t, Root);
\r
1222 else if (com.nhomo>=3) {
\r
1223 for (i=0,k=com.ntime+com.nrgene; i<tree.nbranch; i++) {
\r
1225 nodes[tree.branches[i][1]].pkappa = x + k;
\r
1227 nodes[tree.branches[i][1]].pkappa = x + k + i*nkappa[com.model];
\r
1231 for(i=0; i<tree.nnode; i++) {
\r
1232 j = (com.nhomo==4 ? i : (int)nodes[i].label);
\r
1233 if(com.model==T92) {
\r
1234 nodes[i].pi[0] = nodes[i].pi[2] = (1-x[k+j])/2;
\r
1235 nodes[i].pi[1] = nodes[i].pi[3] = x[k+j]/2;
\r
1238 if (!LASTROUND) f_and_x(x+k+j*3, nodes[i].pi, 4,0,0);
\r
1239 else xtoy (x+k+j*3, nodes[i].pi, 3);
\r
1240 nodes[i].pi[3] = 1-sum(nodes[i].pi,3);
\r
1243 xtoy(nodes[tree.root].pi, com.pi, 4);
\r
1245 else if ((com.model==REV || com.model==REVu) && com.Mgene<=1)
\r
1246 eigenQREVbase (NULL, PMat, x+com.ntime+com.nrgene, com.pi, &nR, Root, Cijk);
\r
1248 else if ((com.model==UNREST || com.model==UNRESTu) && com.Mgene<=1)
\r
1249 eigenQunrest (NULL, x+com.ntime+com.nrgene,com.pi,&nR,cRoot,cU,cV);
\r
1252 if (com.nparK==0 && (com.alpha==0 || com.fix_alpha*com.fix_rho==1))
\r
1254 if (com.nalpha>1) return (status);
\r
1255 k = com.ntime+com.nrate+com.nrgene+com.npi*n31pi;
\r
1256 if (!com.fix_alpha) {
\r
1259 DiscreteGamma (com.freqK,com.rK,com.alpha,com.alpha,K,DGammaUseMedian);
\r
1261 if (!com.fix_rho) {
\r
1263 AutodGamma (com.MK, com.freqK, com.rK, &t,com.alpha,com.rho,K);
\r
1265 if (com.nparK==0) return(status);
\r
1267 /* nparK models */
\r
1268 xtoy (x+k, com.rK, K-1);
\r
1270 if (com.nparK==2) {
\r
1271 if (!LASTROUND) f_and_x(x+k+K-1, com.freqK, K,0,0);
\r
1272 else xtoy (x+k+K-1, com.freqK, K-1);
\r
1273 com.freqK[K-1]=1-sum(com.freqK, K-1);
\r
1275 else if (com.nparK==3) { /* rK & MK (double stochastic matrix) */
\r
1276 for (i=0,k+=K-1; i<K-1; k+=K-1,i++) {
\r
1277 if (!LASTROUND) f_and_x(x+k, com.MK+i*K, K,0,0);
\r
1278 else xtoy (x+k, com.MK+i*K, K-1);
\r
1279 com.MK[i*K+K-1]=1-sum(com.MK+i*K,K-1);
\r
1282 for(i=0,com.MK[(K-1)*K+j]=1;i<K-1; i++)
\r
1283 com.MK[(K-1)*K+j]-=com.MK[i*K+j];
\r
1284 if (com.MK[(K-1)*K+j]<0)
\r
1285 printf("SetPar: MK[K-1][j]=%.5f<0\n",com.MK[(K-1)*K+j]);
\r
1288 else if (com.nparK==4) { /* rK & MK */
\r
1289 for (i=0, k+=K-1; i<K; k+=K-1, i++) {
\r
1290 if (!LASTROUND) f_and_x(x+k, com.MK+i*K, K,0,0);
\r
1291 else xtoy (x+k, com.MK+i*K, K-1);
\r
1292 com.MK[i*K+K-1]=1-sum(com.MK+i*K,K-1);
\r
1294 PtoPi(com.MK, com.freqK, K, space);
\r
1296 com.rK[K-1]=(1-innerp(com.freqK, com.rK, K-1))/com.freqK[K-1];
\r
1301 int SetPGene(int igene, int _pi, int _UVRoot, int _alpha, double x[])
\r
1303 /* xcom[] does not contain time parameters
\r
1304 Note that com.piG[][] have been homogeneized if (com.Mgene==3)
\r
1306 int nr[]={0, 1, 0, 1, 1, 1, 2, 5, 11};
\r
1307 int k=com.nrgene+(com.Mgene>=3)*igene*nr[com.model];
\r
1308 double *xcom=x+com.ntime;
\r
1309 double ka1=xcom[k], ka2=(com.model==TN93?xcom[k+1]:-1);
\r
1311 if(com.Mgene==2 && com.fix_kappa) ka1 = ka2 = com.kappa;
\r
1314 xtoy(com.piG[igene], com.pi, 4);
\r
1317 if (com.model==K80) com.kappa = ka1;
\r
1318 else if (com.model<=TN93)
\r
1319 eigenTN93(com.model, ka1, ka2, com.pi, &nR, Root, Cijk);
\r
1320 else if (com.model==REV || com.model==REVu)
\r
1321 eigenQREVbase(NULL, PMat, xcom+k, com.pi, &nR, Root, Cijk);
\r
1324 com.alpha = xcom[com.nrgene+com.nrate+com.npi+igene]; /* check?? */
\r
1325 DiscreteGamma(com.freqK, com.rK, com.alpha, com.alpha, com.ncatG, DGammaUseMedian);
\r
1331 int SetxBound (int np, double xb[][2])
\r
1333 /* sets lower and upper bounds for variables during iteration
\r
1335 int i, j, k=0, nf=0, n31pi=(com.model==T92?1:3);
\r
1336 double tb[]={.4e-6, 9999}, tb0=.4e-6, rgeneb[]={1e-4,999}, rateb[]={1e-5,999};
\r
1337 double alphab[]={.005, 999}, rhob[]={-0.2, 0.99}, pb[]={.00001,.99999};
\r
1338 double fb[]={-19,9}; /* transformed freqs.*/
\r
1340 SetxBoundTimes (xb);
\r
1341 for(i=com.ntime;i<np;i++) FOR (j,2) xb[i][j]=rateb[j];
\r
1343 FOR (i,com.nrgene) FOR (j,2) xb[com.ntime+i][j]=rgeneb[j];
\r
1344 FOR (i,com.nrate) FOR (j,2) xb[com.ntime+com.nrgene+i][j]=rateb[j];
\r
1345 k=com.ntime+com.nrgene+com.nrate;
\r
1346 for(i=0; i<com.npi*n31pi; i++) {
\r
1347 xb[k][0] =(com.model==T92?pb[0]:fb[0]);
\r
1348 xb[k++][1]=(com.model==T92?pb[1]:fb[1]);
\r
1350 for (i=0;i<com.nalpha;i++,k++) FOR (j,2) xb[k][j]=alphab[j];
\r
1351 if (!com.fix_rho) FOR (j,2) xb[np-1][j]=rhob[j];
\r
1353 FOR (i,com.ncatG-1) { xb[k][0]=rateb[0]; xb[k++][1]=rateb[1]; }
\r
1354 if (com.nparK==2) nf=com.ncatG-1;
\r
1355 else if(com.nparK==3) nf=(com.ncatG-1)*(com.ncatG-1);
\r
1356 else if(com.nparK==4) nf=(com.ncatG-1)*com.ncatG;
\r
1357 FOR(i,nf) { xb[k][0]=fb[0]; xb[k++][1]=fb[1]; }
\r
1359 if(noisy>2 && np<50) {
\r
1360 printf("\nBounds (np=%d):\n",np);
\r
1361 FOR(i,np) printf(" %10.6f", xb[i][0]); FPN(F0);
\r
1362 FOR(i,np) printf(" %10.6f", xb[i][1]); FPN(F0);
\r
1367 int testx (double x[], int np)
\r
1369 /* This is used for LS branch length estimation by nls2, called only if(clock==0)
\r
1372 double tb[]={.4e-6, 99};
\r
1374 FOR (i,com.ntime)
\r
1375 if (x[i]<tb[0] || x[i]>tb[1])
\r
1382 int ConditionalPNode (int inode, int igene, double x[])
\r
1384 int n=com.ncode, i,j,k,h, ison, pos0=com.posG[igene],pos1=com.posG[igene+1];
\r
1387 for (i=0; i<nodes[inode].nson; i++)
\r
1388 if (nodes[nodes[inode].sons[i]].nson>0 && !com.oldconP[nodes[inode].sons[i]])
\r
1389 ConditionalPNode (nodes[inode].sons[i], igene, x);
\r
1390 if(inode<com.ns) { /* young ancestor */
\r
1391 for(h=pos0*n; h<pos1*n; h++)
\r
1392 nodes[inode].conP[h] = 0;
\r
1395 for(h=pos0*n; h<pos1*n; h++)
\r
1396 nodes[inode].conP[h] = 1;
\r
1397 if (com.cleandata && inode<com.ns) /* young ancestor */
\r
1398 for(h=pos0; h<pos1; h++)
\r
1399 nodes[inode].conP[h*n+com.z[inode][h]] = 1;
\r
1401 for (i=0; i<nodes[inode].nson; i++) {
\r
1402 ison = nodes[inode].sons[i];
\r
1403 t = nodes[ison].branch*_rateSite;
\r
1406 if(com.clock) t *= GetBranchRate(igene,(int)nodes[ison].label,x,NULL);
\r
1407 else t *= com.rgene[igene];
\r
1409 GetPMatBranch(PMat, x, t, ison);
\r
1411 if (nodes[ison].nson<1 && com.cleandata) { /* tip && clean */
\r
1412 for(h=pos0; h<pos1; h++)
\r
1413 for(j=0; j<n; j++)
\r
1414 nodes[inode].conP[h*n+j] *= PMat[j*n+com.z[ison][h]];
\r
1416 else if (nodes[ison].nson<1 && !com.cleandata) { /* tip & unclean */
\r
1417 for(h=pos0; h<pos1; h++)
\r
1418 for(j=0; j<n; j++) {
\r
1419 for(k=0,t=0; k<nChara[com.z[ison][h]]; k++)
\r
1420 t += PMat[j*n+CharaMap[com.z[ison][h]][k]];
\r
1421 nodes[inode].conP[h*n+j] *= t;
\r
1425 for(h=pos0; h<pos1; h++)
\r
1426 for(j=0; j<n; j++) {
\r
1427 for(k=0,t=0; k<n; k++)
\r
1428 t += PMat[j*n+k]*nodes[ison].conP[h*n+k];
\r
1429 nodes[inode].conP[h*n+j] *= t;
\r
1433 } /* for (ison) */
\r
1434 if(com.NnodeScale && com.nodeScale[inode]) NodeScale(inode, pos0, pos1);
\r
1439 int PMatCijk (double P[], double t)
\r
1441 /* P(t)ij = SUM Cijk * exp{Root*t}
\r
1443 int i,j,k, n=com.ncode, nr=nR;
\r
1444 double expt[5], pij;
\r
1446 if (t<-.001 && noisy>3)
\r
1447 printf ("\nt = %.5f in PMatCijk", t);
\r
1448 if (t<1e-200) { identity (P, n); return(0); }
\r
1450 for (k=1; k<nr; k++) expt[k] = exp(t*Root[k]);
\r
1451 for(i=0; i<n; i++) for(j=0; j<n; j++) {
\r
1452 for (k=1,pij=Cijk[i*n*nr+j*nr+0]; k<nr; k++)
\r
1453 pij += Cijk[i*n*nr+j*nr+k]*expt[k];
\r
1454 P[i*n+j] = (pij>0?pij:0);
\r
1462 int CollapsSite (FILE *fout, int nsep, int ns, int *ncat, int SiteCat[])
\r
1464 int j,k, it, h, b[NS], ndiff, n1, bit1;
\r
1465 /* n1: # of 1's ... bit1: the 1st nonzero bit */
\r
1467 *ncat = 5 + (1<<(ns-1))-1 + nsep*11;
\r
1468 if (fout) fprintf (fout, "\n# cat:%5d # sep:%5d\n\n", *ncat, nsep);
\r
1470 FOR (h, 1<<2*ns) {
\r
1471 for (j=0,it=h; j<ns; b[ns-1-j]=it%4,it/=4,j++) ;
\r
1472 for (j=1,ndiff=0; j<ns; j++) {
\r
1473 FOR (k,j) if (b[j]==b[k]) break;
\r
1474 if (k==j) ndiff++;
\r
1477 default : SiteCat[h]=0; break;
\r
1478 case (0) : SiteCat[h]=b[0]+1; break;
\r
1480 for (j=1,it=0,n1=0,bit1=0; j<ns; j++) {
\r
1484 if (bit1==0 && k) bit1=ns-1-j;
\r
1487 if (nsep==0) { SiteCat[h]=it; break; }
\r
1489 SiteCat[h]=it+min2(bit1+1,nsep)*11;
\r
1490 if (n1==1 && bit1<nsep) {
\r
1492 SiteCat[h]+=(b[0]*4+b[ns-1-bit1]-b[0]-(b[0]<=b[ns-1-bit1]));
\r
1497 FOR (j, ns) fprintf (fout, "%1c", BASEs[b[j]]);
\r
1498 fprintf (fout, "%5d ", SiteCat[h]);
\r
1499 if (h%4==3) FPN (fout);
\r
1505 int GetPexpML (double x[], int ncat, int SiteCat[], double pexp[])
\r
1507 int j, it, h, nodeb[NNODE];
\r
1508 int isum, nsum=1<<(2*(tree.nbranch-com.ns+1));
\r
1509 double fh, y, Pt[NBRANCH][16];
\r
1511 if (com.ngene>1 || com.nhomo || com.alpha || com.nparK || com.model>REV)
\r
1512 error2 ("Pexp()");
\r
1513 SetParameters (x);
\r
1514 FOR (j, tree.nbranch)
\r
1515 PMatCijk (Pt[j], nodes[tree.branches[j][1]].branch);
\r
1517 for (h=0; h<(1<<2*com.ns); h++) {
\r
1518 if (SiteCat[h] == 0) continue;
\r
1519 for (j=0,it=h; j<com.ns; nodeb[com.ns-1-j]=it&3,it>>=2,j++) ;
\r
1520 for (isum=0,fh=0; isum<nsum; isum++) {
\r
1521 for (j=0,it=isum; j<tree.nbranch-com.ns+1; j++)
\r
1522 { nodeb[com.ns+j]=it%4; it/=4; }
\r
1523 for (j=0,y=com.pi[nodeb[tree.root]]; j<tree.nbranch; j++)
\r
1524 y*=Pt[j][nodeb[tree.branches[j][0]]*4+nodeb[tree.branches[j][1]]];
\r
1527 pexp[SiteCat[h]] += fh;
\r
1529 pexp[0] = 1-sum(pexp+1,ncat-1);
\r
1534 int TestModel (FILE *fout, double x[], int nsep, double space[])
\r
1536 /* test of models, using com.
\r
1538 int j,h, it, ls=com.ls, ncat, *SiteCat;
\r
1539 double *pexp=space, *nobs, lmax0, lnL0, X2, ef, de;
\r
1541 SiteCat=(int*)malloc((1<<2*com.ns)* sizeof(int));
\r
1542 if (SiteCat == NULL) error2 ("oom");
\r
1543 CollapsSite (F0, nsep, com.ns, &ncat, SiteCat);
\r
1544 fprintf (fout, "\n\nAppr. test of model.. ncat%6d nsep%6d\n", ncat,nsep);
\r
1547 zero (pexp, 2*ncat);
\r
1549 FOR (h, com.npatt) {
\r
1550 for (j=0,it=0; j<com.ns; j++) it = it*4+(com.z[j][h]-1);
\r
1551 nobs[SiteCat[it]] += com.fpatt[h];
\r
1553 GetPexpML (x, ncat, SiteCat, pexp);
\r
1555 for (h=0,lnL0=0,X2=0,lmax0=-(double)ls*log((double)ls); h<ncat; h++) {
\r
1557 lmax0 += nobs[h]*log(nobs[h]);
\r
1558 lnL0 += nobs[h]*log(pexp[h]);
\r
1560 ef = com.ls*pexp[h];
\r
1561 de = square(nobs[h]-ef)/ef;
\r
1563 fprintf (fout, "\nCat #%3d%9.0f%9.2f%9.2f", h, nobs[h], ef,de);
\r
1565 fprintf (fout, "\n\nlmax0:%12.4f D2:%12.4f X2:%12.4f\n",
\r
1566 lmax0, 2*(lmax0-lnL0), X2);
\r
1576 int OldDistributions (int inode, double AncientFreqs[])
\r
1578 /* reconstruct nucleotide frequencies at and down inode for nonhomogeneous models com.nhomo==3 or 4.
\r
1579 AncientFreqs[tree.nnode*4]
\r
1582 double kappa=com.kappa;
\r
1584 if (com.alpha || com.model>REV) {
\r
1585 puts("OldDistributions() does not run when alpha > 0 or model >= TN93");
\r
1588 if(inode==tree.root)
\r
1589 xtoy (nodes[inode].pi, AncientFreqs+inode*n, n);
\r
1591 if (com.nhomo>2 && com.model<=TN93)
\r
1592 eigenTN93(com.model, *nodes[inode].pkappa, *(nodes[inode].pkappa+1), nodes[inode].pi, &nR, Root, Cijk);
\r
1593 else if (com.nhomo>2 && com.model==REV)
\r
1594 eigenQREVbase(NULL, PMat, nodes[inode].pkappa, nodes[inode].pi, &nR, Root, Cijk);
\r
1596 PMatCijk (PMat, nodes[inode].branch);
\r
1597 matby (AncientFreqs+nodes[inode].father*n, PMat, AncientFreqs+inode*n, 1, n, n);
\r
1599 for(i=0; i<nodes[inode].nson; i++)
\r
1600 OldDistributions (nodes[inode].sons[i], AncientFreqs);
\r
1608 /* problems and notes
\r
1610 (1) AdG model: generation of com.MK[K*K] is not independent
\r
1611 of com.alpha or DiscreteGamma().
\r
1613 non-homogeneous process models:
\r
1614 nhomo fix_kappa models
\r
1616 0 (1 pi given) 0 (solve 1 kappa) JC69, K80, F81, F84, HKY85,
\r
1617 REV(5), UNREST(11)
\r
1618 1 (1 kappa given) K80, F84, HKY85
\r
1620 1 (solve 1 pi) 0 (as above) F84, HKY85, REV(5)
\r
1621 1 F81(0), F84, HKY85
\r
1623 2 (b kappa's) ? K80, F84, HKY85
\r
1625 3 (ns+2 pi) 0 (solve 1 kappa) F84 & HKY85
\r
1626 1 (nbranch kappa) F84 & HKY85
\r
1628 4 (nnode pi) 0,1 (as above)
\r
1631 space-time process models:
\r
1632 nparK fix_alpha fix_rho parameters
\r
1633 0 (0,1) (0,1) alpha & rho for AdG
\r
1635 1 set to 1 set to 1 rK[] (freqK=1/K) (K-1)
\r
1636 2 set to 1 set to 1 rK[] & freqK[] 2(K-1)
\r
1637 3 set to 1 set to 1 rK[] & MK[] (freqK=1/K) K(K-1)
\r
1638 4 set to 1 set to 1 rK[] & MK[] K*K-1
\r
1641 Local clock models
\r
1642 parameters under local clock (com.clock=2)
\r
1643 com.ntime = (#ancestral nodes) - 1 + (#rates) - 1
\r
1644 Parameters include (ns-1) node times t[] and rates for branches.
\r
1646 x[1..(nid-1)]: ti/t0 ancestral node times expressed as ratios
\r
1647 x[ns-1 .. ntime-1]: rates for branches
\r