7 * Dept. Integrative Biology
8 * University of California, Berkeley
9 * Berkeley, CA 94720-3140
13 * Swedish Museum of Natural History
15 * SE-10405 Stockholm, SWEDEN
16 * fredrik.ronquist@nrm.se
18 * With important contributions by
20 * Paul van der Mark (paulvdm@sc.fsu.edu)
21 * Maxim Teslenko (maxim.teslenko@nrm.se)
23 * and by many users (run 'acknowledgments' to see more info)
25 * This program is free software; you can redistribute it and/or
26 * modify it under the terms of the GNU General Public License
27 * as published by the Free Software Foundation; either version 2
28 * of the License, or (at your option) any later version.
30 * This program is distributed in the hope that it will be useful,
31 * but WITHOUT ANY WARRANTY; without even the implied warranty of
32 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33 * GNU General Public License for more details (www.gnu.org).
38 #include "likelihood.h"
43 const char* const svnRevisionLikeliC = "$Rev: 1003 $"; /* Revision keyword which is expanded/updated by svn on each commit/update */
45 #define LIKE_EPSILON 1.0e-300
48 extern int numLocalChains;
49 extern int rateProbRowSize; /* size of rate probs for one chain one state */
50 extern MrBFlt **rateProbs; /* pointers to rate probs used by adgamma model */
52 /* local prototypes */
53 void CopySiteScalers (ModelInfo *m, int chain);
54 void FlipCondLikeSpace (ModelInfo *m, int chain, int nodeIndex);
55 void FlipCijkSpace (ModelInfo *m, int chain);
56 void FlipNodeScalerSpace (ModelInfo *m, int chain, int nodeIndex);
57 void FlipSiteScalerSpace (ModelInfo *m, int chain);
58 void FlipTiProbsSpace (ModelInfo *m, int chain, int nodeIndex);
59 MrBFlt GetRate (int division, int chain);
60 int RemoveNodeScalers(TreeNode *p, int division, int chain);
61 int RemoveNodeScalers_SSE(TreeNode *p, int division, int chain);
62 void ResetSiteScalers (ModelInfo *m, int chain);
63 int UpDateCijk (int whichPart, int whichChain);
66 #if !defined (SSE_ENABLED) || 1
67 /*----------------------------------------------------------------
69 | CondLikeDown_Bin: binary model with or without rate
72 -----------------------------------------------------------------*/
73 int CondLikeDown_Bin (TreeNode *p, int division, int chain)
76 CLFlt *clL, *clR, *clP, *pL, *pR, *tiPL, *tiPR;
79 /* find model settings for this division */
80 m = &modelSettings[division];
82 /* Flip conditional likelihood space */
83 FlipCondLikeSpace (m, chain, p->index);
85 /* find conditional likelihood pointers */
86 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
87 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
88 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
90 /* find transition probabilities */
91 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
92 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
96 for (k=0; k<m->numGammaCats; k++)
98 for (c=0; c<m->numChars; c++)
100 *(clP++) = (tiPL[0]*clL[0] + tiPL[1]*clL[1])
101 *(tiPR[0]*clR[0] + tiPR[1]*clR[1]);
102 *(clP++) = (tiPL[2]*clL[0] + tiPL[3]*clL[1])
103 *(tiPR[2]*clR[0] + tiPR[3]*clR[1]);
118 #if defined (SSE_ENABLED)
119 /*----------------------------------------------------------------
121 | CondLikeDown_Bin_SSE: binary model with or without rate
124 -----------------------------------------------------------------*/
125 int CondLikeDown_Bin_SSE (TreeNode *p, int division, int chain)
128 CLFlt *pL, *pR, *tiPL, *tiPR;
129 __m128 *clL, *clR, *clP;
130 __m128 m1, m2, m3, m4, m5, m6;
133 m = &modelSettings[division];
135 /* flip state of node so that we are not overwriting old cond likes */
136 FlipCondLikeSpace (m, chain, p->index);
138 /* find conditional likelihood pointers */
139 clL = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->left->index ]];
140 clR = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->right->index]];
141 clP = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->index ]];
143 /* find transition probabilities */
144 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
145 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
149 for (k=0; k<m->numGammaCats; k++)
151 for (c=0; c<m->numSSEChars; c++)
153 m1 = _mm_load1_ps (&tiPL[0]);
154 m2 = _mm_load1_ps (&tiPR[0]);
155 m5 = _mm_mul_ps (m1, clL[0]);
156 m6 = _mm_mul_ps (m2, clR[0]);
158 m1 = _mm_load1_ps (&tiPL[1]);
159 m2 = _mm_load1_ps (&tiPR[1]);
160 m3 = _mm_mul_ps (m1, clL[1]);
161 m4 = _mm_mul_ps (m2, clR[1]);
163 m5 = _mm_add_ps (m3, m5);
164 m6 = _mm_add_ps (m4, m6);
166 *clP++ = _mm_mul_ps (m5, m6);
168 m1 = _mm_load1_ps (&tiPL[2]);
169 m2 = _mm_load1_ps (&tiPR[2]);
170 m5 = _mm_mul_ps (m1, clL[0]);
171 m6 = _mm_mul_ps (m2, clR[0]);
173 m1 = _mm_load1_ps (&tiPL[3]);
174 m2 = _mm_load1_ps (&tiPR[3]);
175 m3 = _mm_mul_ps (m1, clL[1]);
176 m4 = _mm_mul_ps (m2, clR[1]);
178 m5 = _mm_add_ps (m3, m5);
179 m6 = _mm_add_ps (m4, m6);
181 *clP++ = _mm_mul_ps (m5, m6);
194 /*----------------------------------------------------------------
196 | CondLikeDown_Gen: general n-state model with or without rate
199 -----------------------------------------------------------------*/
200 int CondLikeDown_Gen (TreeNode *p, int division, int chain)
202 int a, b, c, h, i, k, j, shortCut, *lState=NULL, *rState=NULL,
203 nObsStates, nStates, nStatesSquared, preLikeJump;
204 CLFlt likeL, likeR, *pL, *pR, *tiPL, *tiPR, *clL, *clR, *clP;
206 # if !defined (DEBUG_NOSHORTCUTS)
210 /* find model settings for this division and nStates, nStatesSquared */
211 m = &modelSettings[division];
212 nObsStates = m->numStates;
213 nStates = m->numModelStates;
214 nStatesSquared = nStates * nStates;
215 preLikeJump = nObsStates * nStates;
217 /* flip conditional likelihood space */
218 FlipCondLikeSpace (m, chain, p->index);
220 /* find conditional likelihood pointers */
221 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
222 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
223 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
225 /* find transition probabilities */
226 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
227 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
229 /* find likelihoods of site patterns for left branch if terminal */
231 # if !defined (DEBUG_NOSHORTCUTS)
232 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
235 lState = m->termState[p->left->index];
237 for (k=a=0; k<m->numGammaCats; k++)
240 for (i=0; i<nObsStates; i++)
241 for (j=i; j<nStatesSquared; j+=nStates)
242 preLikeL[a++] = tiPL[j];
243 for (b=1; b<nStates/nObsStates; b++)
246 for (i=0; i<nObsStates; i++)
248 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
249 preLikeL[a++] += tiPL[j];
253 for (i=0; i<nStates; i++)
255 tiPL += nStatesSquared;
259 /* find likelihoods of site patterns for right branch if terminal */
260 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
263 rState = m->termState[p->right->index];
265 for (k=a=0; k<m->numGammaCats; k++)
268 for (i=0; i<nObsStates; i++)
269 for (j=i; j<nStatesSquared; j+=nStates)
270 preLikeR[a++] = tiPR[j];
271 for (b=1; b<nStates/nObsStates; b++)
274 for (i=0; i<nObsStates; i++)
276 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
277 preLikeR[a++] += tiPR[j];
281 for (i=0; i<nStates; i++)
283 tiPR += nStatesSquared;
292 for (k=0; k<m->numGammaCats; k++)
294 for (c=0; c<m->numChars; c++)
296 for (i=h=0; i<nStates; i++)
299 for (j=0; j<nStates; j++)
301 likeL += tiPL[h]*clL[j];
302 likeR += tiPR[h++]*clR[j];
304 *(clP++) = likeL * likeR;
309 tiPL += nStatesSquared;
310 tiPR += nStatesSquared;
315 for (k=0; k<m->numGammaCats; k++)
317 for (c=0; c<m->numChars; c++)
319 a = lState[c] + k*(preLikeJump+nStates);
320 for (i=h=0; i<nStates; i++)
323 for (j=0; j<nStates; j++)
325 likeR += tiPR[h++]*clR[j];
327 *(clP++) = preLikeL[a++] * likeR;
331 tiPR += nStatesSquared;
336 for (k=0; k<m->numGammaCats; k++)
338 for (c=0; c<m->numChars; c++)
340 a = rState[c] + k*(preLikeJump+nStates);
341 for (i=h=0; i<nStates; i++)
344 for (j=0; j<nStates; j++)
346 likeL += tiPL[h++]*clL[j];
348 *(clP++) = preLikeR[a++] * likeL;
352 tiPL += nStatesSquared;
356 for (k=0; k<m->numGammaCats; k++)
358 for (c=0; c<m->numChars; c++)
360 a = rState[c] + k*(preLikeJump+nStates);
361 b = lState[c] + k*(preLikeJump+nStates);
362 for (i=0; i<nStates; i++)
364 *(clP++) = preLikeR[a++] * preLikeL[b++];
375 #if defined (SSE_ENABLED)
376 /*----------------------------------------------------------------
378 | CondLikeDown_Gen_SSE: general n-state model with or without rate
381 -----------------------------------------------------------------*/
382 int CondLikeDown_Gen_SSE (TreeNode *p, int division, int chain)
384 int c, c1, h, i, j, k, t, shortCut, *lState=NULL, *rState=NULL, nStates, nStatesSquared, nObsStates, preLikeJump;
385 CLFlt *pL, *pR, *tiPL, *tiPR;
386 __m128 *clL, *clR, *clP;
387 __m128 mTiPL, mTiPR, mL, mR, mAcumL, mAcumR;
389 CLFlt *preLikeRV[FLOATS_PER_VEC];
390 CLFlt *preLikeLV[FLOATS_PER_VEC];
392 # if !defined (DEBUG_NOSHORTCUTS)
396 /* find model settings for this division and nStates, nStatesSquared */
397 m = &modelSettings[division];
398 nObsStates = m->numStates;
399 nStates = m->numModelStates;
400 nStatesSquared = nStates * nStates;
401 preLikeJump = nObsStates * nStates;
403 /* Flip conditional likelihood space */
404 FlipCondLikeSpace (m, chain, p->index);
406 /* find conditional likelihood pointers */
407 clL = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->left->index ]];
408 clR = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->right->index]];
409 clP = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->index ]];
411 /* find transition probabilities */
412 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
413 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
415 /* find likelihoods of site patterns for left branch if terminal */
417 # if !defined (DEBUG_NOSHORTCUTS)
418 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
421 lState = m->termState[p->left->index];
423 for (k=a=0; k<m->numGammaCats; k++)
426 for (i=0; i<nObsStates; i++)
427 for (j=i; j<nStatesSquared; j+=nStates)
428 preLikeL[a++] = tiPL[j];
429 for (b=1; b<nStates/nObsStates; b++)
432 for (i=0; i<nObsStates; i++)
434 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
435 preLikeL[a++] += tiPL[j];
439 for (i=0; i<nStates; i++)
441 tiPL += nStatesSquared;
445 /* find likelihoods of site patterns for right branch if terminal */
446 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
449 rState = m->termState[p->right->index];
451 for (k=a=0; k<m->numGammaCats; k++)
454 for (i=0; i<nObsStates; i++)
455 for (j=i; j<nStatesSquared; j+=nStates)
456 preLikeR[a++] = tiPR[j];
457 for (b=1; b<nStates/nObsStates; b++)
460 for (i=0; i<nObsStates; i++)
462 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
463 preLikeR[a++] += tiPR[j];
467 for (i=0; i<nStates; i++)
469 tiPR += nStatesSquared;
479 for (k=0; k<m->numGammaCats; k++)
481 for (c=0; c<m->numSSEChars; c++)
483 for (i=h=0; i<nStates; i++)
485 mAcumL = _mm_setzero_ps();
486 mAcumR = _mm_setzero_ps();
487 for (j=0; j<nStates; j++)
489 mTiPL = _mm_load1_ps (&tiPL[h]);
490 mTiPR = _mm_load1_ps (&tiPR[h++]);
491 mL = _mm_mul_ps (mTiPL, clL[j]);
492 mR = _mm_mul_ps (mTiPR, clR[j]);
493 mAcumL = _mm_add_ps (mL, mAcumL);
494 mAcumR = _mm_add_ps (mR, mAcumR);
496 *(clP++) = _mm_mul_ps (mAcumL, mAcumR);
501 tiPL += nStatesSquared;
502 tiPR += nStatesSquared;
507 for (k=0; k<m->numGammaCats; k++)
509 for (c=t=0; c<m->numSSEChars; c++)
511 for (c1=0; c1<FLOATS_PER_VEC; c1++,t++)
513 preLikeLV[c1] = &preLikeL[lState[t] + k*(preLikeJump+nStates)];
515 for (i=h=0; i<nStates; i++)
517 assert (FLOATS_PER_VEC == 4); /* In the following statment we assume that SSE register can hold exactly 4 ClFlts. */
518 mAcumL = _mm_set_ps (*(preLikeLV[3]++), *(preLikeLV[2]++), *(preLikeLV[1]++), *(preLikeLV[0]++));
519 mAcumR = _mm_setzero_ps();
520 for (j=0; j<nStates; j++)
522 mTiPR = _mm_load1_ps (&tiPR[h++]);
523 mR = _mm_mul_ps (mTiPR, clR[j]);
524 mAcumR = _mm_add_ps (mR, mAcumR);
526 *(clP++) = _mm_mul_ps (mAcumL,mAcumR);
530 tiPR += nStatesSquared;
535 for (k=0; k<m->numGammaCats; k++)
537 for (c=t=0; c<m->numSSEChars; c++)
539 for (c1=0; c1<FLOATS_PER_VEC; c1++,t++)
541 preLikeRV[c1] = &preLikeR[rState[t] + k*(preLikeJump+nStates)];
543 for (i=h=0; i<nStates; i++)
545 assert (FLOATS_PER_VEC == 4); /* In the following statment we assume that SSE register can hold exactly 4 ClFlts. */
546 mAcumR = _mm_set_ps (*(preLikeRV[3]++), *(preLikeRV[2]++), *(preLikeRV[1]++), *(preLikeRV[0]++));
547 mAcumL = _mm_setzero_ps();
548 for (j=0; j<nStates; j++)
550 mTiPL = _mm_load1_ps (&tiPL[h++]);
551 mL = _mm_mul_ps (mTiPL, clL[j]);
552 mAcumL = _mm_add_ps (mL, mAcumL);
554 *(clP++) = _mm_mul_ps (mAcumL,mAcumR);
558 tiPL += nStatesSquared;
562 for (k=0; k<m->numGammaCats; k++)
564 for (c=t=0; c<m->numSSEChars; c++)
566 for (c1=0; c1<FLOATS_PER_VEC; c1++,t++)
568 preLikeRV[c1] = &preLikeR[rState[t] + k*(preLikeJump+nStates)];
569 preLikeLV[c1] = &preLikeL[lState[t] + k*(preLikeJump+nStates)];
571 for (i=0; i<nStates; i++)
573 assert (FLOATS_PER_VEC == 4); /* In the following 2 statments we assume that SSE register can hold exactly 4 ClFlts. */
574 mL = _mm_set_ps (*(preLikeLV[3]++), *(preLikeLV[2]++), *(preLikeLV[1]++), *(preLikeLV[0]++));
575 mR = _mm_set_ps (*(preLikeRV[3]++), *(preLikeRV[2]++), *(preLikeRV[1]++), *(preLikeRV[0]++));
576 *(clP++) = _mm_mul_ps (mL,mR);
587 /*----------------------------------------------------------------
589 | CondLikeDown_Gen_GibbsGamma: general n-state model with rate
590 | variation modeled using discrete gamma with Gibbs resampling
592 -----------------------------------------------------------------*/
593 int CondLikeDown_Gen_GibbsGamma (TreeNode *p, int division, int chain)
595 int a, b, c, i, j, r, *rateCat, shortCut, *lState=NULL, *rState=NULL,
596 nObsStates, nStates, nStatesSquared, nGammaCats;
597 CLFlt likeL, likeR, *pL, *pR, *tiPL, *tiPR, *clL, *clR, *clP;
599 # if !defined (DEBUG_NOSHORTCUTS)
603 /* find model settings for this division and nStates, nStatesSquared */
604 m = &modelSettings[division];
605 nObsStates = m->numStates;
606 nStates = m->numModelStates;
607 nStatesSquared = nStates * nStates;
609 /* flip conditional likelihood space */
610 FlipCondLikeSpace (m, chain, p->index);
612 /* find conditional likelihood pointers */
613 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
614 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
615 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
617 /* find transition probabilities */
618 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
619 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
621 /* find rate category index and number of gamma categories */
622 rateCat = m->tiIndex + chain * m->numChars;
623 nGammaCats = m->numGammaCats;
625 /* find likelihoods of site patterns for left branch if terminal */
627 # if !defined (DEBUG_NOSHORTCUTS)
628 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
631 lState = m->termState[p->left->index];
633 for (k=a=0; k<nGammaCats; k++)
636 for (i=0; i<nObsStates; i++)
637 for (j=i; j<nStatesSquared; j+=nStates)
638 preLikeL[a++] = tiPL[j];
639 for (b=1; b<nStates/nObsStates; b++)
642 for (i=0; i<nObsStates; i++)
644 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
645 preLikeL[a++] += tiPL[j];
649 for (i=0; i<nStates; i++)
651 tiPL += nStatesSquared;
655 /* find likelihoods of site patterns for right branch if terminal */
656 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
659 rState = m->termState[p->right->index];
661 for (k=a=0; k<nGammaCats; k++)
664 for (i=0; i<nObsStates; i++)
665 for (j=i; j<nStatesSquared; j+=nStates)
666 preLikeR[a++] = tiPR[j];
667 for (b=1; b<nStates/nObsStates; b++)
670 for (i=0; i<nObsStates; i++)
672 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
673 preLikeR[a++] += tiPR[j];
677 for (i=0; i<nStates; i++)
679 tiPR += nStatesSquared;
687 for (c=0; c<m->numChars; c++)
692 tiPL = pL + r*nStatesSquared;
693 tiPR = pR + r*nStatesSquared;
694 for (i=0; i<nStates; i++)
697 for (j=0; j<nStates; j++)
699 likeL += (*tiPL++) * clL[j];
700 likeR += (*tiPR++) * clR[j];
702 *(clP++) = likeL * likeR;
712 for (c=0; c<m->numChars; c++)
717 tiPR = pR + r*nStatesSquared;
718 a = lState[c] + r*(nStatesSquared+nStates);
719 for (i=0; i<nStates; i++)
722 for (j=0; j<nStates; j++)
724 likeR += (*tiPR++)*clR[j];
726 *(clP++) = preLikeL[a++] * likeR;
735 for (c=0; c<m->numChars; c++)
740 tiPL = pL + r*nStatesSquared;
741 a = rState[c] + r*(nStatesSquared+nStates);
742 for (i=0; i<nStates; i++)
745 for (j=0; j<nStates; j++)
747 likeL += (*tiPL++)*clL[j];
749 *(clP++) = preLikeR[a++] * likeL;
758 for (c=0; c<m->numChars; c++)
763 a = lState[c] + r*(nStatesSquared+nStates);
764 b = rState[c] + r*(nStatesSquared+nStates);
765 for (i=0; i<nStates; i++)
766 *(clP++) = preLikeL[a++]*preLikeR[b++];
778 /*----------------------------------------------------------------
780 | CondLikeDown_NUC4: 4by4 nucleotide model with or without rate
783 -----------------------------------------------------------------*/
784 int CondLikeDown_NUC4 (TreeNode *p, int division, int chain)
786 int c, h, i, j, k, shortCut, *lState=NULL, *rState=NULL;
787 CLFlt *clL, *clR, *clP, *pL, *pR, *tiPL, *tiPR;
790 m = &modelSettings[division];
792 /* flip space so that we do not overwrite old cond likes */
793 FlipCondLikeSpace (m, chain, p->index);
795 /* find conditional likelihood pointers */
796 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
797 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
798 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
800 /* find transition probabilities */
801 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
802 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
804 /* find likelihoods of site patterns for left branch if terminal */
806 # if !defined (DEBUG_NOSHORTCUTS)
807 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
810 lState = m->termState[p->left->index];
812 for (k=j=0; k<m->numGammaCats; k++)
816 preLikeL[j++] = tiPL[0];
817 preLikeL[j++] = tiPL[4];
818 preLikeL[j++] = tiPL[8];
819 preLikeL[j++] = tiPL[12];
829 /* find likelihoods of site patterns for right branch if terminal */
830 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
833 rState = m->termState[p->right->index];
835 for (k=j=0; k<m->numGammaCats; k++)
839 preLikeR[j++] = tiPR[0];
840 preLikeR[j++] = tiPR[4];
841 preLikeR[j++] = tiPR[8];
842 preLikeR[j++] = tiPR[12];
858 for (k=h=0; k<m->numGammaCats; k++)
860 for (c=0; c<m->numChars; c++)
862 clP[h++] = (tiPL[AA]*clL[A] + tiPL[AC]*clL[C] + tiPL[AG]*clL[G] + tiPL[AT]*clL[T])
863 *(tiPR[AA]*clR[A] + tiPR[AC]*clR[C] + tiPR[AG]*clR[G] + tiPR[AT]*clR[T]);
864 clP[h++] = (tiPL[CA]*clL[A] + tiPL[CC]*clL[C] + tiPL[CG]*clL[G] + tiPL[CT]*clL[T])
865 *(tiPR[CA]*clR[A] + tiPR[CC]*clR[C] + tiPR[CG]*clR[G] + tiPR[CT]*clR[T]);
866 clP[h++] = (tiPL[GA]*clL[A] + tiPL[GC]*clL[C] + tiPL[GG]*clL[G] + tiPL[GT]*clL[T])
867 *(tiPR[GA]*clR[A] + tiPR[GC]*clR[C] + tiPR[GG]*clR[G] + tiPR[GT]*clR[T]);
868 clP[h++] = (tiPL[TA]*clL[A] + tiPL[TC]*clL[C] + tiPL[TG]*clL[G] + tiPL[TT]*clL[T])
869 *(tiPR[TA]*clR[A] + tiPR[TC]*clR[C] + tiPR[TG]*clR[G] + tiPR[TT]*clR[T]);
879 for (k=h=0; k<m->numGammaCats; k++)
881 for (c=0; c<m->numChars; c++)
883 i = lState[c] + k*20;
884 clP[h++] = preLikeL[i++]
885 *(tiPR[AA]*clR[A] + tiPR[AC]*clR[C] + tiPR[AG]*clR[G] + tiPR[AT]*clR[T]);
886 clP[h++] = preLikeL[i++]
887 *(tiPR[CA]*clR[A] + tiPR[CC]*clR[C] + tiPR[CG]*clR[G] + tiPR[CT]*clR[T]);
888 clP[h++] = preLikeL[i++]
889 *(tiPR[GA]*clR[A] + tiPR[GC]*clR[C] + tiPR[GG]*clR[G] + tiPR[GT]*clR[T]);
890 clP[h++] = preLikeL[i++]
891 *(tiPR[TA]*clR[A] + tiPR[TC]*clR[C] + tiPR[TG]*clR[G] + tiPR[TT]*clR[T]);
899 for (k=h=0; k<m->numGammaCats; k++)
901 for (c=0; c<m->numChars; c++)
903 i = rState[c] + k*20;
904 clP[h++] = (tiPL[AA]*clL[A] + tiPL[AC]*clL[C] + tiPL[AG]*clL[G] + tiPL[AT]*clL[T])
906 clP[h++] = (tiPL[CA]*clL[A] + tiPL[CC]*clL[C] + tiPL[CG]*clL[G] + tiPL[CT]*clL[T])
908 clP[h++] = (tiPL[GA]*clL[A] + tiPL[GC]*clL[C] + tiPL[GG]*clL[G] + tiPL[GT]*clL[T])
910 clP[h++] = (tiPL[TA]*clL[A] + tiPL[TC]*clL[C] + tiPL[TG]*clL[G] + tiPL[TT]*clL[T])
918 for (k=h=0; k<m->numGammaCats; k++)
920 for (c=0; c<m->numChars; c++)
925 clP[h++] = preLikeL[i++]*preLikeR[j++];
926 clP[h++] = preLikeL[i++]*preLikeR[j++];
927 clP[h++] = preLikeL[i++]*preLikeR[j++];
928 clP[h++] = preLikeL[i++]*preLikeR[j++];
937 /*----------------------------------------------------------------
939 | CondLikeDown_NUC4_GibbsGamma: 4by4 nucleotide model with rate
940 | variation approximated using Gibbs sampling of gamma
942 -----------------------------------------------------------------*/
943 int CondLikeDown_NUC4_GibbsGamma (TreeNode *p, int division, int chain)
945 int c, h, i, j, r, *rateCat, shortCut, *lState=NULL, *rState=NULL,
947 CLFlt *clL, *clR, *clP, *pL, *pR, *tiPL, *tiPR;
949 # if !defined (DEBUG_NOSHORTCUTS)
953 m = &modelSettings[division];
955 /* flip conditional likelihood space */
956 FlipCondLikeSpace (m, chain, p->index);
958 /* find conditional likelihood pointers */
959 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
960 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
961 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
963 /* find transition probabilities */
964 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
965 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
967 /* find rate category index and number of gamma categories */
968 rateCat = m->tiIndex + chain * m->numChars;
969 nGammaCats = m->numGammaCats;
971 /* find likelihoods of site patterns for left branch if terminal */
973 # if !defined (DEBUG_NOSHORTCUTS)
974 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
977 lState = m->termState[p->left->index];
979 for (k=j=0; k<nGammaCats; k++)
983 preLikeL[j++] = tiPL[0];
984 preLikeL[j++] = tiPL[4];
985 preLikeL[j++] = tiPL[8];
986 preLikeL[j++] = tiPL[12];
996 /* find likelihoods of site patterns for right branch if terminal */
997 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
1000 rState = m->termState[p->right->index];
1002 for (k=j=0; k<nGammaCats; k++)
1006 preLikeR[j++] = tiPR[0];
1007 preLikeR[j++] = tiPR[4];
1008 preLikeR[j++] = tiPR[8];
1009 preLikeR[j++] = tiPR[12];
1014 preLikeR[j++] = 1.0;
1023 for (c=h=0; c<m->numChars; c++)
1030 clP[h++] = (tiPL[AA]*clL[A] + tiPL[AC]*clL[C] + tiPL[AG]*clL[G] + tiPL[AT]*clL[T])
1031 *(tiPR[AA]*clR[A] + tiPR[AC]*clR[C] + tiPR[AG]*clR[G] + tiPR[AT]*clR[T]);
1032 clP[h++] = (tiPL[CA]*clL[A] + tiPL[CC]*clL[C] + tiPL[CG]*clL[G] + tiPL[CT]*clL[T])
1033 *(tiPR[CA]*clR[A] + tiPR[CC]*clR[C] + tiPR[CG]*clR[G] + tiPR[CT]*clR[T]);
1034 clP[h++] = (tiPL[GA]*clL[A] + tiPL[GC]*clL[C] + tiPL[GG]*clL[G] + tiPL[GT]*clL[T])
1035 *(tiPR[GA]*clR[A] + tiPR[GC]*clR[C] + tiPR[GG]*clR[G] + tiPR[GT]*clR[T]);
1036 clP[h++] = (tiPL[TA]*clL[A] + tiPL[TC]*clL[C] + tiPL[TG]*clL[G] + tiPL[TT]*clL[T])
1037 *(tiPR[TA]*clR[A] + tiPR[TC]*clR[C] + tiPR[TG]*clR[G] + tiPR[TT]*clR[T]);
1046 for (c=h=0; c<m->numChars; c++)
1052 i = lState[c] + r * 20;
1053 clP[h++] = preLikeL[i++]
1054 *(tiPR[AA]*clR[A] + tiPR[AC]*clR[C] + tiPR[AG]*clR[G] + tiPR[AT]*clR[T]);
1055 clP[h++] = preLikeL[i++]
1056 *(tiPR[CA]*clR[A] + tiPR[CC]*clR[C] + tiPR[CG]*clR[G] + tiPR[CT]*clR[T]);
1057 clP[h++] = preLikeL[i++]
1058 *(tiPR[GA]*clR[A] + tiPR[GC]*clR[C] + tiPR[GG]*clR[G] + tiPR[GT]*clR[T]);
1059 clP[h++] = preLikeL[i++]
1060 *(tiPR[TA]*clR[A] + tiPR[TC]*clR[C] + tiPR[TG]*clR[G] + tiPR[TT]*clR[T]);
1068 for (c=h=0; c<m->numChars; c++)
1074 i = rState[c] + r * 20;
1075 clP[h++] = (tiPL[AA]*clL[A] + tiPL[AC]*clL[C] + tiPL[AG]*clL[G] + tiPL[AT]*clL[T])
1077 clP[h++] = (tiPL[CA]*clL[A] + tiPL[CC]*clL[C] + tiPL[CG]*clL[G] + tiPL[CT]*clL[T])
1079 clP[h++] = (tiPL[GA]*clL[A] + tiPL[GC]*clL[C] + tiPL[GG]*clL[G] + tiPL[GT]*clL[T])
1081 clP[h++] = (tiPL[TA]*clL[A] + tiPL[TC]*clL[C] + tiPL[TG]*clL[G] + tiPL[TT]*clL[T])
1090 for (c=h=0; c<m->numChars; c++)
1095 i = lState[c] + r * 20;
1096 j = rState[c] + r * 20;
1097 clP[h++] = preLikeL[i++]*preLikeR[j++];
1098 clP[h++] = preLikeL[i++]*preLikeR[j++];
1099 clP[h++] = preLikeL[i++]*preLikeR[j++];
1100 clP[h++] = preLikeL[i++]*preLikeR[j++];
1112 #if defined (SSE_ENABLED)
1113 /*----------------------------------------------------------------
1115 | CondLikeDown_NUC4_SSE: 4by4 nucleotide model with or without rate
1116 | variation, using SSE instructions
1118 -----------------------------------------------------------------*/
1119 int CondLikeDown_NUC4_SSE (TreeNode *p, int division, int chain)
1122 CLFlt *pL, *pR, *tiPL, *tiPR;
1123 __m128 *clL, *clR, *clP;
1124 __m128 m1, m2, m3, m4, m5, m6;
1127 m = &modelSettings[division];
1129 /* flip state of node so that we are not overwriting old cond likes */
1130 FlipCondLikeSpace (m, chain, p->index);
1132 /* find conditional likelihood pointers */
1133 clL = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->left->index ]];
1134 clR = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->right->index]];
1135 clP = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->index ]];
1137 /* find transition probabilities */
1138 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
1139 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
1143 for (k=0; k<m->numGammaCats; k++)
1145 for (c=0; c<m->numSSEChars; c++)
1147 m1 = _mm_load1_ps (&tiPL[AA]);
1148 m2 = _mm_load1_ps (&tiPR[AA]);
1149 m5 = _mm_mul_ps (m1, clL[A]);
1150 m6 = _mm_mul_ps (m2, clR[A]);
1152 m1 = _mm_load1_ps (&tiPL[AC]);
1153 m2 = _mm_load1_ps (&tiPR[AC]);
1154 m3 = _mm_mul_ps (m1, clL[C]);
1155 m4 = _mm_mul_ps (m2, clR[C]);
1156 m5 = _mm_add_ps (m3, m5);
1157 m6 = _mm_add_ps (m4, m6);
1159 m1 = _mm_load1_ps (&tiPL[AG]);
1160 m2 = _mm_load1_ps (&tiPR[AG]);
1161 m3 = _mm_mul_ps (m1, clL[G]);
1162 m4 = _mm_mul_ps (m2, clR[G]);
1163 m5 = _mm_add_ps (m3, m5);
1164 m6 = _mm_add_ps (m4, m6);
1166 m1 = _mm_load1_ps (&tiPL[AT]);
1167 m2 = _mm_load1_ps (&tiPR[AT]);
1168 m3 = _mm_mul_ps (m1, clL[T]);
1169 m4 = _mm_mul_ps (m2, clR[T]);
1170 m5 = _mm_add_ps (m3, m5);
1171 m6 = _mm_add_ps (m4, m6);
1173 *clP++ = _mm_mul_ps (m5, m6);
1175 m1 = _mm_load1_ps (&tiPL[CA]);
1176 m2 = _mm_load1_ps (&tiPR[CA]);
1177 m5 = _mm_mul_ps (m1, clL[A]);
1178 m6 = _mm_mul_ps (m2, clR[A]);
1180 m1 = _mm_load1_ps (&tiPL[CC]);
1181 m2 = _mm_load1_ps (&tiPR[CC]);
1182 m3 = _mm_mul_ps (m1, clL[C]);
1183 m4 = _mm_mul_ps (m2, clR[C]);
1184 m5 = _mm_add_ps (m3, m5);
1185 m6 = _mm_add_ps (m4, m6);
1187 m1 = _mm_load1_ps (&tiPL[CG]);
1188 m2 = _mm_load1_ps (&tiPR[CG]);
1189 m3 = _mm_mul_ps (m1, clL[G]);
1190 m4 = _mm_mul_ps (m2, clR[G]);
1191 m5 = _mm_add_ps (m3, m5);
1192 m6 = _mm_add_ps (m4, m6);
1194 m1 = _mm_load1_ps (&tiPL[CT]);
1195 m2 = _mm_load1_ps (&tiPR[CT]);
1196 m3 = _mm_mul_ps (m1, clL[T]);
1197 m4 = _mm_mul_ps (m2, clR[T]);
1198 m5 = _mm_add_ps (m3, m5);
1199 m6 = _mm_add_ps (m4, m6);
1201 *clP++ = _mm_mul_ps (m5, m6);
1203 m1 = _mm_load1_ps (&tiPL[GA]);
1204 m2 = _mm_load1_ps (&tiPR[GA]);
1205 m5 = _mm_mul_ps (m1, clL[A]);
1206 m6 = _mm_mul_ps (m2, clR[A]);
1208 m1 = _mm_load1_ps (&tiPL[GC]);
1209 m2 = _mm_load1_ps (&tiPR[GC]);
1210 m3 = _mm_mul_ps (m1, clL[C]);
1211 m4 = _mm_mul_ps (m2, clR[C]);
1212 m5 = _mm_add_ps (m3, m5);
1213 m6 = _mm_add_ps (m4, m6);
1215 m1 = _mm_load1_ps (&tiPL[GG]);
1216 m2 = _mm_load1_ps (&tiPR[GG]);
1217 m3 = _mm_mul_ps (m1, clL[G]);
1218 m4 = _mm_mul_ps (m2, clR[G]);
1219 m5 = _mm_add_ps (m3, m5);
1220 m6 = _mm_add_ps (m4, m6);
1222 m1 = _mm_load1_ps (&tiPL[GT]);
1223 m2 = _mm_load1_ps (&tiPR[GT]);
1224 m3 = _mm_mul_ps (m1, clL[T]);
1225 m4 = _mm_mul_ps (m2, clR[T]);
1226 m5 = _mm_add_ps (m3, m5);
1227 m6 = _mm_add_ps (m4, m6);
1229 *clP++ = _mm_mul_ps (m5, m6);
1231 m1 = _mm_load1_ps (&tiPL[TA]);
1232 m2 = _mm_load1_ps (&tiPR[TA]);
1233 m5 = _mm_mul_ps (m1, clL[A]);
1234 m6 = _mm_mul_ps (m2, clR[A]);
1236 m1 = _mm_load1_ps (&tiPL[TC]);
1237 m2 = _mm_load1_ps (&tiPR[TC]);
1238 m3 = _mm_mul_ps (m1, clL[C]);
1239 m4 = _mm_mul_ps (m2, clR[C]);
1240 m5 = _mm_add_ps (m3, m5);
1241 m6 = _mm_add_ps (m4, m6);
1243 m1 = _mm_load1_ps (&tiPL[TG]);
1244 m2 = _mm_load1_ps (&tiPR[TG]);
1245 m3 = _mm_mul_ps (m1, clL[G]);
1246 m4 = _mm_mul_ps (m2, clR[G]);
1247 m5 = _mm_add_ps (m3, m5);
1248 m6 = _mm_add_ps (m4, m6);
1250 m1 = _mm_load1_ps (&tiPL[TT]);
1251 m2 = _mm_load1_ps (&tiPR[TT]);
1252 m3 = _mm_mul_ps (m1, clL[T]);
1253 m4 = _mm_mul_ps (m2, clR[T]);
1254 m5 = _mm_add_ps (m3, m5);
1255 m6 = _mm_add_ps (m4, m6);
1257 *clP++ = _mm_mul_ps (m5, m6);
1271 #if !defined (SSE_ENABLED) || 1
1272 /*----------------------------------------------------------------
1274 | CondLikeDown_NY98: codon model with omega variation
1276 -----------------------------------------------------------------*/
1277 int CondLikeDown_NY98 (TreeNode *p, int division, int chain)
1279 int a, b, c, h, i, j, k, shortCut, *lState=NULL, *rState=NULL, nStates, nStatesSquared;
1280 CLFlt likeL, likeR, *pL, *pR, *tiPL, *tiPR, *clL, *clR, *clP;
1283 /* find model settings for this division and nStates, nStatesSquared */
1284 m = &modelSettings[division];
1285 nStates = m->numModelStates;
1286 nStatesSquared = nStates * nStates;
1288 /* Flip conditional likelihood space */
1289 FlipCondLikeSpace (m, chain, p->index);
1291 /* find conditional likelihood pointers */
1292 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
1293 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
1294 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
1296 /* find transition probabilities */
1297 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
1298 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
1300 /* find likelihoods of site patterns for left branch if terminal */
1302 # if !defined (DEBUG_NOSHORTCUTS)
1303 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
1306 lState = m->termState[p->left->index];
1308 for (k=a=0; k<m->numOmegaCats; k++)
1310 for (i=0; i<nStates; i++)
1311 for (j=i; j<nStatesSquared; j+=nStates)
1312 preLikeL[a++] = tiPL[j];
1314 for (i=0; i<nStates; i++)
1315 preLikeL[a++] = 1.0;
1316 tiPL += nStatesSquared;
1320 /* find likelihoods of site patterns for right branch if terminal */
1321 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
1324 rState = m->termState[p->right->index];
1326 for (k=a=0; k<m->numOmegaCats; k++)
1328 for (i=0; i<nStates; i++)
1329 for (j=i; j<nStatesSquared; j+=nStates)
1330 preLikeR[a++] = tiPR[j];
1332 for (i=0; i<nStates; i++)
1333 preLikeR[a++] = 1.0;
1334 tiPR += nStatesSquared;
1344 for (k=0; k<m->numOmegaCats; k++)
1346 for (c=0; c<m->numChars; c++)
1348 for (i=h=0; i<nStates; i++)
1350 likeL = likeR = 0.0;
1351 for (j=0; j<nStates; j++)
1353 likeL += tiPL[h]*clL[j];
1354 likeR += tiPR[h++]*clR[j];
1356 *(clP++) = likeL * likeR;
1361 tiPL += nStatesSquared;
1362 tiPR += nStatesSquared;
1367 for (k=0; k<m->numOmegaCats; k++)
1369 for (c=0; c<m->numChars; c++)
1371 a = lState[c] + k*(nStatesSquared+nStates);
1372 for (i=h=0; i<nStates; i++)
1375 for (j=0; j<nStates; j++)
1377 likeR += tiPR[h++]*clR[j];
1379 *(clP++) = preLikeL[a++] * likeR;
1383 tiPR += nStatesSquared;
1388 for (k=0; k<m->numOmegaCats; k++)
1390 for (c=0; c<m->numChars; c++)
1392 a = rState[c] + k*(nStatesSquared+nStates);
1393 for (i=h=0; i<nStates; i++)
1396 for (j=0; j<nStates; j++)
1398 likeL += tiPL[h++]*clL[j];
1400 *(clP++) = preLikeR[a++] * likeL;
1404 tiPL += nStatesSquared;
1408 for (k=0; k<m->numOmegaCats; k++)
1410 for (c=0; c<m->numChars; c++)
1412 a = rState[c] + k*(nStatesSquared+nStates);
1413 b = lState[c] + k*(nStatesSquared+nStates);
1414 for (i=0; i<nStates; i++)
1416 *(clP++) = preLikeR[a++] * preLikeL[b++];
1428 #if defined (SSE_ENABLED)
1429 /*----------------------------------------------------------------
1431 | CondLikeDown_NY98_SSE: codon model with omega variation
1433 -----------------------------------------------------------------*/
1434 int CondLikeDown_NY98_SSE (TreeNode *p, int division, int chain)
1436 int c, c1, h, i, j, k, t, shortCut, *lState=NULL, *rState=NULL, nStates, nStatesSquared;
1437 CLFlt *pL, *pR, *tiPL, *tiPR;
1438 __m128 *clL, *clR, *clP;
1439 __m128 mTiPL, mTiPR, mL, mR, mAcumL, mAcumR;
1441 CLFlt *preLikeRV[FLOATS_PER_VEC];
1442 CLFlt *preLikeLV[FLOATS_PER_VEC];
1443 # if !defined (DEBUG_NOSHORTCUTS)
1447 /* find model settings for this division and nStates, nStatesSquared */
1448 m = &modelSettings[division];
1449 nStates = m->numModelStates;
1450 nStatesSquared = nStates * nStates;
1452 /* Flip conditional likelihood space */
1453 FlipCondLikeSpace (m, chain, p->index);
1455 /* find conditional likelihood pointers */
1456 clL = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->left->index ]];
1457 clR = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->right->index]];
1458 clP = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->index ]];
1460 /* find transition probabilities */
1461 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
1462 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
1464 /* find likelihoods of site patterns for left branch if terminal */
1466 # if !defined (DEBUG_NOSHORTCUTS)
1467 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
1470 lState = m->termState[p->left->index];
1472 for (k=a=0; k<m->numOmegaCats; k++)
1474 for (i=0; i<nStates; i++)
1475 for (j=i; j<nStatesSquared; j+=nStates)
1476 preLikeL[a++] = tiPL[j];
1478 for (i=0; i<nStates; i++)
1479 preLikeL[a++] = 1.0;
1480 tiPL += nStatesSquared;
1484 /* find likelihoods of site patterns for right branch if terminal */
1485 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
1488 rState = m->termState[p->right->index];
1490 for (k=a=0; k<m->numOmegaCats; k++)
1492 for (i=0; i<nStates; i++)
1493 for (j=i; j<nStatesSquared; j+=nStates)
1494 preLikeR[a++] = tiPR[j];
1496 for (i=0; i<nStates; i++)
1497 preLikeR[a++] = 1.0;
1498 tiPR += nStatesSquared;
1508 for (k=0; k<m->numOmegaCats; k++)
1510 for (c=0; c<m->numSSEChars; c++)
1512 for (i=h=0; i<nStates; i++)
1514 mAcumL = _mm_setzero_ps();
1515 mAcumR = _mm_setzero_ps();
1516 for (j=0; j<nStates; j++)
1518 mTiPL = _mm_load1_ps (&tiPL[h]);
1519 mTiPR = _mm_load1_ps (&tiPR[h++]);
1520 mL = _mm_mul_ps (mTiPL, clL[j]);
1521 mR = _mm_mul_ps (mTiPR, clR[j]);
1522 mAcumL = _mm_add_ps (mL, mAcumL);
1523 mAcumR = _mm_add_ps (mR, mAcumR);
1525 *(clP++) = _mm_mul_ps (mAcumL, mAcumR);
1530 tiPL += nStatesSquared;
1531 tiPR += nStatesSquared;
1536 for (k=0; k<m->numOmegaCats; k++)
1538 for (c=t=0; c<m->numSSEChars; c++)
1540 for (c1=0; c1<FLOATS_PER_VEC; c1++,t++)
1542 preLikeLV[c1] = &preLikeL[lState[t] + k*(nStatesSquared+nStates)];
1544 for (i=h=0; i<nStates; i++)
1546 assert (FLOATS_PER_VEC == 4); /* In the following statment we assume that SSE register can hold exactly 4 ClFlts. */
1547 mAcumL = _mm_set_ps (*(preLikeLV[3]++), *(preLikeLV[2]++), *(preLikeLV[1]++), *(preLikeLV[0]++));
1548 mAcumR = _mm_setzero_ps();
1549 for (j=0; j<nStates; j++)
1551 mTiPR = _mm_load1_ps (&tiPR[h++]);
1552 mR = _mm_mul_ps (mTiPR, clR[j]);
1553 mAcumR = _mm_add_ps (mR, mAcumR);
1555 *(clP++) = _mm_mul_ps (mAcumL,mAcumR);
1559 tiPR += nStatesSquared;
1564 for (k=0; k<m->numOmegaCats; k++)
1566 for (c=t=0; c<m->numSSEChars; c++)
1568 for (c1=0; c1<FLOATS_PER_VEC; c1++,t++)
1570 preLikeRV[c1] = &preLikeR[rState[t] + k*(nStatesSquared+nStates)];
1572 for (i=h=0; i<nStates; i++)
1574 assert (FLOATS_PER_VEC == 4); /* In the following statment we assume that SSE register can hold exactly 4 ClFlts. */
1575 mAcumR = _mm_set_ps (*(preLikeRV[3]++), *(preLikeRV[2]++), *(preLikeRV[1]++), *(preLikeRV[0]++));
1576 mAcumL = _mm_setzero_ps();
1577 for (j=0; j<nStates; j++)
1579 mTiPL = _mm_load1_ps (&tiPL[h++]);
1580 mL = _mm_mul_ps (mTiPL, clL[j]);
1581 mAcumL = _mm_add_ps (mL, mAcumL);
1583 *(clP++) = _mm_mul_ps (mAcumL,mAcumR);
1587 tiPL += nStatesSquared;
1591 for (k=0; k<m->numOmegaCats; k++)
1593 for (c=t=0; c<m->numSSEChars; c++)
1595 for (c1=0; c1<FLOATS_PER_VEC; c1++,t++)
1597 preLikeRV[c1] = &preLikeR[rState[t] + k*(nStatesSquared+nStates)];
1598 preLikeLV[c1] = &preLikeL[lState[t] + k*(nStatesSquared+nStates)];
1600 for (i=0; i<nStates; i++)
1602 assert (FLOATS_PER_VEC == 4); /* In the following 2 statments we assume that SSE register can hold exactly 4 ClFlts. */
1603 mL = _mm_set_ps (*(preLikeLV[3]++), *(preLikeLV[2]++), *(preLikeLV[1]++), *(preLikeLV[0]++));
1604 mR = _mm_set_ps (*(preLikeRV[3]++), *(preLikeRV[2]++), *(preLikeRV[1]++), *(preLikeRV[0]++));
1605 *(clP++) = _mm_mul_ps (mL,mR);
1617 /*----------------------------------------------------------------
1619 | CondLikeDown_Std: variable number of states model
1620 | with or without rate variation
1622 -----------------------------------------------------------------*/
1623 int CondLikeDown_Std (TreeNode *p, int division, int chain)
1625 int a, c, h, i, j, k, nStates, nCats, tmp;
1626 CLFlt *clL, *clR, *clP, *pL, *pR, *tiPL, *tiPR, likeL, likeR;
1629 m = &modelSettings[division];
1631 /* Flip conditional likelihood space */
1632 FlipCondLikeSpace (m, chain, p->index);
1634 /* find conditional likelihood pointers */
1635 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
1636 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
1637 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
1639 /* find transition probabilities */
1640 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
1641 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
1643 /* Conditional likelihood space is assumed to be arranged in numGammaCats blocks of data. Each block contains all data for one gamma category.
1644 Each gamma cat block consist of numChars sequences of data, each of this sequences corresponds to a character of data matrix.
1645 A sequence consists of nStates for all non-binary data, otherwise length of sequence is nStates*numBetaCats (i.e. 2*numBetaCats) */
1647 /* calculate ancestral probabilities */
1648 for (k=h=0; k<m->numGammaCats; k++)
1650 /* calculate ancestral probabilities */
1651 for (c=0; c<m->numChars; c++)
1653 nStates = m->nStates[c];
1655 /* the following lines ensure that nCats is 1 unless */
1656 /* the character is binary and beta categories are used */
1658 nCats = m->numBetaCats;
1662 tmp = k*nStates*nStates; /* tmp contains offset to skip gamma cats that already processed*/
1663 tiPL = pL + m->tiIndex[c] + tmp;
1664 tiPR = pR + m->tiIndex[c] + tmp;
1665 tmp = (m->numGammaCats-1)*2*2; /* tmp contains size of block of tpi matrices across all gamma cats (minus one) for single beta category. Further used only if character is binary to jump to next beta category */
1667 for (j=0; j<nCats;j++)
1669 for (a=0; a<nStates; a++)
1671 likeL = likeR = 0.0;
1672 for (i=0; i<nStates; i++)
1674 likeL += *(tiPL++) * clL[i];
1675 likeR += *(tiPR++) * clR[i];
1677 clP[h++] = likeL * likeR;
1692 #if !defined (SSE_ENABLED) || 1
1693 /*----------------------------------------------------------------
1695 | CondLikeRoot_Bin: binary model with or without rate
1698 -----------------------------------------------------------------*/
1699 int CondLikeRoot_Bin (TreeNode *p, int division, int chain)
1702 CLFlt *clL, *clR, *clP, *clA, *pL, *pR, *pA, *tiPL, *tiPR, *tiPA;
1705 /* find model settings for this division */
1706 m = &modelSettings[division];
1708 /* flip state of node so that we are not overwriting old cond likes */
1709 FlipCondLikeSpace (m, chain, p->index);
1711 /* find conditional likelihood pointers */
1712 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
1713 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
1714 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
1715 clA = m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
1717 /* find transition probabilities (or calculate instead) */
1718 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
1719 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
1720 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
1725 for (k=0; k<m->numGammaCats; k++)
1727 for (c=0; c<m->numChars; c++)
1729 *(clP++) = (tiPL[0]*clL[0] + tiPL[1]*clL[1])
1730 *(tiPR[0]*clR[0] + tiPR[1]*clR[1])
1731 *(tiPA[0]*clA[0] + tiPA[1]*clA[1]);
1732 *(clP++) = (tiPL[2]*clL[0] + tiPL[3]*clL[1])
1733 *(tiPR[2]*clR[0] + tiPR[3]*clR[1])
1734 *(tiPA[2]*clA[0] + tiPA[3]*clA[1]);
1750 #if defined (SSE_ENABLED)
1751 /*----------------------------------------------------------------
1753 | CondLikeRoot_Bin_SSE:binary model with or without rate
1756 -----------------------------------------------------------------*/
1757 int CondLikeRoot_Bin_SSE (TreeNode *p, int division, int chain)
1760 CLFlt *pL, *pR, *pA, *tiPL, *tiPR, *tiPA;
1761 __m128 *clL, *clR, *clP, *clA;
1762 __m128 m1, m2, m3, m4, m5, m6, m7;
1765 m = &modelSettings[division];
1767 /* flip state of node so that we are not overwriting old cond likes */
1768 FlipCondLikeSpace (m, chain, p->index);
1770 /* find conditional likelihood pointers */
1771 clL = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->left->index ]];
1772 clR = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->right->index]];
1773 clP = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->index ]];
1774 clA = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
1776 /* find transition probabilities */
1777 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
1778 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
1779 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
1784 for (k=0; k<m->numGammaCats; k++)
1786 for (c=0; c<m->numSSEChars; c++)
1788 m1 = _mm_load1_ps (&tiPL[0]);
1790 m2 = _mm_mul_ps (m1, m5);
1791 m1 = _mm_load1_ps (&tiPL[2]);
1792 m6 = _mm_mul_ps (m1, m5);
1794 m1 = _mm_load1_ps (&tiPL[1]);
1796 m3 = _mm_mul_ps (m1, m5);
1797 m1 = _mm_load1_ps (&tiPL[3]);
1798 m5 = _mm_mul_ps (m1, m5);
1800 m4 = _mm_add_ps (m2, m3); /* in m4 we get (tiPL[0]*clL[0] + tiPL[1]*clL[1]) */
1801 m6 = _mm_add_ps (m5, m6); /* in m6 we get (tiPL[2]*clL[0] + tiPL[3]*clL[1]) */
1803 m1 = _mm_load1_ps (&tiPR[0]);
1805 m2 = _mm_mul_ps (m1, m5);
1806 m1 = _mm_load1_ps (&tiPR[2]);
1807 m7 = _mm_mul_ps (m1, m5);
1809 m1 = _mm_load1_ps (&tiPR[1]);
1811 m3 = _mm_mul_ps (m1, m5);
1812 m1 = _mm_load1_ps (&tiPR[3]);
1813 m5 = _mm_mul_ps (m1, m5);
1815 m1 = _mm_add_ps (m2, m3); /* in m1 we get (tiPR[0]*clR[0] + tiPR[1]*clR[1]) */
1816 m7 = _mm_add_ps (m5, m7); /* in m7 we get (tiPR[2]*clR[0] + tiPR[3]*clR[1]) */
1818 m4 = _mm_mul_ps (m1, m4); /* in m4 we get (tiPL[0]*clL[0] + tiPL[1]*clL[1])*(tiPR[0]*clR[0] + tiPR[1]*clR[1]) */
1819 m7 = _mm_mul_ps (m6, m7); /* in m7 we get (tiPL[2]*clL[0] + tiPL[3]*clL[1])*(tiPR[2]*clR[0] + tiPR[3]*clR[1]) */
1821 m1 = _mm_load1_ps (&tiPA[0]);
1823 m2 = _mm_mul_ps (m1, m5);
1824 m1 = _mm_load1_ps (&tiPA[2]);
1825 m6 = _mm_mul_ps (m1, m5);
1827 m1 = _mm_load1_ps (&tiPA[1]);
1829 m3 = _mm_mul_ps (m1, m5);
1830 m1 = _mm_load1_ps (&tiPA[3]);
1831 m1 = _mm_mul_ps (m1, m5);
1833 m2 = _mm_add_ps (m2, m3); /* in m1 we get (tiPA[0]*clA[0] + tiPA[1]*clA[1]) */
1834 m1 = _mm_add_ps (m1, m6); /* in m1 we get (tiPA[2]*clA[0] + tiPA[3]*clA[1]) */
1836 *clP++ = _mm_mul_ps (m2, m4);
1837 *clP++ = _mm_mul_ps (m1, m7);
1851 /*----------------------------------------------------------------
1853 | CondLikeRoot_Gen: general n-state model with or without rate
1856 -----------------------------------------------------------------*/
1857 int CondLikeRoot_Gen (TreeNode *p, int division, int chain)
1859 int a, b, c, d, h, i, j, k, shortCut, *lState=NULL, *rState=NULL, *aState=NULL,
1860 nObsStates, nStates, nStatesSquared, preLikeJump;
1861 CLFlt likeL, likeR, likeA, *clL, *clR, *clP, *clA, *pL, *pR, *pA,
1862 *tiPL, *tiPR, *tiPA;
1864 # if !defined (DEBUG_NOSHORTCUTS)
1868 /* find model settings for this division and nStates, nStatesSquared */
1869 m = &modelSettings[division];
1870 nObsStates = m->numStates;
1871 nStates = m->numModelStates;
1872 nStatesSquared = nStates * nStates;
1873 preLikeJump = nObsStates * nStates;
1875 /* flip state of node so that we are not overwriting old cond likes */
1876 FlipCondLikeSpace (m, chain, p->index);
1878 /* find conditional likelihood pointers */
1879 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
1880 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
1881 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
1882 clA = m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
1884 /* find transition probabilities (or calculate instead) */
1885 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
1886 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
1887 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
1889 /* find likelihoods of site patterns for left branch if terminal */
1891 # if !defined (DEBUG_NOSHORTCUTS)
1892 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
1895 lState = m->termState[p->left->index];
1897 for (k=a=0; k<m->numGammaCats; k++)
1900 for (i=0; i<nObsStates; i++)
1901 for (j=i; j<nStatesSquared; j+=nStates)
1902 preLikeL[a++] = tiPL[j];
1903 for (b=1; b<nStates/nObsStates; b++)
1906 for (i=0; i<nObsStates; i++)
1908 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
1909 preLikeL[a++] += tiPL[j];
1913 for (i=0; i<nStates; i++)
1914 preLikeL[a++] = 1.0;
1915 tiPL += nStatesSquared;
1919 /* find likelihoods of site patterns for right branch if terminal */
1920 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
1923 rState = m->termState[p->right->index];
1925 for (k=a=0; k<m->numGammaCats; k++)
1928 for (i=0; i<nObsStates; i++)
1929 for (j=i; j<nStatesSquared; j+=nStates)
1930 preLikeR[a++] = tiPR[j];
1931 for (b=1; b<nStates/nObsStates; b++)
1934 for (i=0; i<nObsStates; i++)
1936 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
1937 preLikeR[a++] += tiPR[j];
1941 for (i=0; i<nStates; i++)
1942 preLikeR[a++] = 1.0;
1943 tiPR += nStatesSquared;
1947 /* find likelihoods of site patterns for anc branch, always terminal */
1948 if (m->isPartAmbig[p->anc->index] == YES)
1954 aState = m->termState[p->anc->index];
1956 for (k=a=0; k<m->numGammaCats; k++)
1959 for (i=0; i<nObsStates; i++)
1960 for (j=i; j<nStatesSquared; j+=nStates)
1961 preLikeA[a++] = tiPA[j];
1962 for (b=1; b<nStates/nObsStates; b++)
1965 for (i=0; i<nObsStates; i++)
1967 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
1968 preLikeA[a++] += tiPA[j];
1972 for (i=0; i<nStates; i++)
1973 preLikeA[a++] = 1.0;
1974 tiPA += nStatesSquared;
1988 for (k=0; k<m->numGammaCats; k++)
1990 for (c=0; c<m->numChars; c++)
1992 for (i=h=0; i<nStates; i++)
1994 likeL = likeR = likeA = 0.0;
1995 for (j=0; j<nStates; j++)
1997 likeL += tiPL[h]*clL[j];
1998 likeR += tiPR[h]*clR[j];
1999 likeA += tiPA[h++]*clA[j];
2001 *(clP++) = likeL * likeR * likeA;
2007 tiPL += nStatesSquared;
2008 tiPR += nStatesSquared;
2009 tiPA += nStatesSquared;
2015 for (k=0; k<m->numGammaCats; k++)
2017 for (c=0; c<m->numChars; c++)
2019 a = aState[c] + k*(preLikeJump+nStates);
2020 for (i=h=0; i<nStates; i++)
2022 likeR = likeL = 0.0;
2023 for (j=0; j<nStates; j++)
2025 likeR += tiPR[h]*clR[j];
2026 likeL += tiPL[h++]*clL[j];
2028 *(clP++) = preLikeA[a++] * likeR * likeL;
2033 tiPR += nStatesSquared;
2034 tiPL += nStatesSquared;
2039 for (k=0; k<m->numGammaCats; k++)
2041 for (c=0; c<m->numChars; c++)
2043 a = lState[c] + k*(preLikeJump+nStates);
2044 b = aState[c] + k*(preLikeJump+nStates);
2045 for (i=h=0; i<nStates; i++)
2048 for (j=0; j<nStates; j++)
2050 likeR += tiPR[h++]*clR[j];
2052 *(clP++) = preLikeL[a++] * preLikeA[b++] * likeR;
2056 tiPR += nStatesSquared;
2061 for (k=0; k<m->numGammaCats; k++)
2063 for (c=0; c<m->numChars; c++)
2065 a = rState[c] + k*(preLikeJump+nStates);
2066 b = aState[c] + k*(preLikeJump+nStates);
2067 for (i=h=0; i<nStates; i++)
2070 for (j=0; j<nStates; j++)
2072 likeL += tiPL[h++]*clL[j];
2074 *(clP++) = preLikeR[a++] * preLikeA[b++] * likeL;
2078 tiPL += nStatesSquared;
2082 for (k=0; k<m->numGammaCats; k++)
2084 for (c=0; c<m->numChars; c++)
2086 a = rState[c] + k*(preLikeJump+nStates);
2087 b = lState[c] + k*(preLikeJump+nStates);
2088 d = aState[c] + k*(preLikeJump+nStates);
2089 for (i=0; i<nStates; i++)
2091 *(clP++) = preLikeR[a++] * preLikeL[b++] * preLikeA[d++];
2102 #if defined (SSE_ENABLED)
2103 /*----------------------------------------------------------------
2105 | CondLikeRoot_Gen_SSE:general n-state model with or without rate
2108 -----------------------------------------------------------------*/
2109 int CondLikeRoot_Gen_SSE (TreeNode *p, int division, int chain)
2111 int c, c1, t, h, i, j, k, shortCut, *lState=NULL, *rState=NULL, *aState=NULL, nObsStates, preLikeJump,
2112 nStates, nStatesSquared;
2113 CLFlt *pL, *pR, *pA,
2114 *tiPL, *tiPR, *tiPA;
2115 __m128 *clL, *clR, *clP, *clA;
2116 __m128 mTiPL, mTiPR, mTiPA, mL, mR, mA, mAcumL, mAcumR, mAcumA;
2118 CLFlt *preLikeRV[FLOATS_PER_VEC];
2119 CLFlt *preLikeLV[FLOATS_PER_VEC];
2120 CLFlt *preLikeAV[FLOATS_PER_VEC];
2122 # if !defined (DEBUG_NOSHORTCUTS)
2126 /* find model settings for this division and nStates, nStatesSquared */
2127 m = &modelSettings[division];
2128 nObsStates = m->numStates;
2129 nStates = m->numModelStates;
2130 nStatesSquared = nStates * nStates;
2131 preLikeJump = nObsStates * nStates;
2133 /* flip state of node so that we are not overwriting old cond likes */
2134 FlipCondLikeSpace (m, chain, p->index);
2136 /* find conditional likelihood pointers */
2137 clL = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->left->index ]];
2138 clR = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->right->index]];
2139 clP = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->index ]];
2140 clA = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
2142 /* find transition probabilities (or calculate instead) */
2143 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
2144 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
2145 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
2147 /* find likelihoods of site patterns for left branch if terminal */
2149 # if !defined (DEBUG_NOSHORTCUTS)
2150 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
2153 lState = m->termState[p->left->index];
2155 for (k=a=0; k<m->numGammaCats; k++)
2158 for (i=0; i<nObsStates; i++)
2159 for (j=i; j<nStatesSquared; j+=nStates)
2160 preLikeL[a++] = tiPL[j];
2161 for (b=1; b<nStates/nObsStates; b++)
2164 for (i=0; i<nObsStates; i++)
2166 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
2167 preLikeL[a++] += tiPL[j];
2171 for (i=0; i<nStates; i++)
2172 preLikeL[a++] = 1.0;
2173 tiPL += nStatesSquared;
2177 /* find likelihoods of site patterns for right branch if terminal */
2178 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
2181 rState = m->termState[p->right->index];
2183 for (k=a=0; k<m->numGammaCats; k++)
2186 for (i=0; i<nObsStates; i++)
2187 for (j=i; j<nStatesSquared; j+=nStates)
2188 preLikeR[a++] = tiPR[j];
2189 for (b=1; b<nStates/nObsStates; b++)
2192 for (i=0; i<nObsStates; i++)
2194 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
2195 preLikeR[a++] += tiPR[j];
2199 for (i=0; i<nStates; i++)
2200 preLikeR[a++] = 1.0;
2201 tiPR += nStatesSquared;
2205 /* find likelihoods of site patterns for anc branch, always terminal */
2206 if (m->isPartAmbig[p->anc->index] == YES)
2212 aState = m->termState[p->anc->index];
2214 for (k=a=0; k<m->numGammaCats; k++)
2217 for (i=0; i<nObsStates; i++)
2218 for (j=i; j<nStatesSquared; j+=nStates)
2219 preLikeA[a++] = tiPA[j];
2220 for (b=1; b<nStates/nObsStates; b++)
2223 for (i=0; i<nObsStates; i++)
2225 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
2226 preLikeA[a++] += tiPA[j];
2230 for (i=0; i<nStates; i++)
2231 preLikeA[a++] = 1.0;
2232 tiPA += nStatesSquared;
2245 for (k=0; k<m->numGammaCats; k++)
2247 for (c=0; c<m->numSSEChars; c++)
2249 for (i=h=0; i<nStates; i++)
2251 mAcumL = _mm_setzero_ps();
2252 mAcumR = _mm_setzero_ps();
2253 mAcumA = _mm_setzero_ps();
2254 for (j=0; j<nStates; j++)
2256 mTiPL = _mm_load1_ps (&tiPL[h]);
2257 mTiPR = _mm_load1_ps (&tiPR[h]);
2258 mTiPA = _mm_load1_ps (&tiPA[h++]);
2259 mL = _mm_mul_ps (mTiPL, clL[j]);
2260 mR = _mm_mul_ps (mTiPR, clR[j]);
2261 mA = _mm_mul_ps (mTiPA, clA[j]);
2262 mAcumL = _mm_add_ps (mL, mAcumL);
2263 mAcumR = _mm_add_ps (mR, mAcumR);
2264 mAcumA = _mm_add_ps (mA, mAcumA);
2266 mAcumL = _mm_mul_ps (mAcumL, mAcumR);
2267 *(clP++) = _mm_mul_ps (mAcumL, mAcumA);
2273 tiPL += nStatesSquared;
2274 tiPR += nStatesSquared;
2275 tiPA += nStatesSquared;
2281 for (k=0; k<m->numGammaCats; k++)
2283 for (c=t=0; c<m->numSSEChars; c++)
2285 for (c1=0; c1<FLOATS_PER_VEC; c1++,t++)
2287 preLikeAV[c1] = &preLikeA[aState[t] + k*(preLikeJump+nStates)];
2289 for (i=h=0; i<nStates; i++)
2291 assert (FLOATS_PER_VEC == 4); /* In the following statment we assume that SSE register can hold exactly 4 ClFlts. */
2292 mAcumA = _mm_set_ps (*(preLikeAV[3]++), *(preLikeAV[2]++), *(preLikeAV[1]++), *(preLikeAV[0]++));
2293 mAcumL = _mm_setzero_ps();
2294 mAcumR = _mm_setzero_ps();
2295 for (j=0; j<nStates; j++)
2297 mTiPL = _mm_load1_ps (&tiPL[h]);
2298 mL = _mm_mul_ps (mTiPL, clL[j]);
2299 mAcumL = _mm_add_ps (mL, mAcumL);
2300 mTiPR = _mm_load1_ps (&tiPR[h++]);
2301 mR = _mm_mul_ps (mTiPR, clR[j]);
2302 mAcumR = _mm_add_ps (mR, mAcumR);
2304 mAcumL = _mm_mul_ps (mAcumL, mAcumR);
2305 *(clP++) = _mm_mul_ps (mAcumL, mAcumA);
2310 tiPL += nStatesSquared;
2311 tiPR += nStatesSquared;
2316 for (k=0; k<m->numGammaCats; k++)
2318 for (c=t=0; c<m->numSSEChars; c++)
2320 for (c1=0; c1<FLOATS_PER_VEC; c1++,t++)
2322 preLikeLV[c1] = &preLikeL[lState[t] + k*(preLikeJump+nStates)];
2323 preLikeAV[c1] = &preLikeA[aState[t] + k*(preLikeJump+nStates)];
2325 for (i=h=0; i<nStates; i++)
2327 assert (FLOATS_PER_VEC == 4); /* In the following statment we assume that SSE register can hold exactly 4 ClFlts. */
2328 mAcumL = _mm_set_ps (*(preLikeLV[3]++), *(preLikeLV[2]++), *(preLikeLV[1]++), *(preLikeLV[0]++));
2329 mAcumA = _mm_set_ps (*(preLikeAV[3]++), *(preLikeAV[2]++), *(preLikeAV[1]++), *(preLikeAV[0]++));
2330 mAcumR = _mm_setzero_ps();
2331 for (j=0; j<nStates; j++)
2333 mTiPR = _mm_load1_ps (&tiPR[h++]);
2334 mR = _mm_mul_ps (mTiPR, clR[j]);
2335 mAcumR = _mm_add_ps (mR, mAcumR);
2337 mAcumL = _mm_mul_ps (mAcumL, mAcumR);
2338 *(clP++) = _mm_mul_ps (mAcumL, mAcumA);
2342 tiPR += nStatesSquared;
2347 for (k=0; k<m->numGammaCats; k++)
2349 for (c=t=0; c<m->numSSEChars; c++)
2351 for (c1=0; c1<FLOATS_PER_VEC; c1++,t++)
2353 preLikeRV[c1] = &preLikeR[rState[t] + k*(preLikeJump+nStates)];
2354 preLikeAV[c1] = &preLikeA[aState[t] + k*(preLikeJump+nStates)];
2356 for (i=h=0; i<nStates; i++)
2358 assert (FLOATS_PER_VEC == 4); /* In the following statment we assume that SSE register can hold exactly 4 ClFlts. */
2359 mAcumR = _mm_set_ps (*(preLikeRV[3]++), *(preLikeRV[2]++), *(preLikeRV[1]++), *(preLikeRV[0]++));
2360 mAcumA = _mm_set_ps (*(preLikeAV[3]++), *(preLikeAV[2]++), *(preLikeAV[1]++), *(preLikeAV[0]++));
2361 mAcumL = _mm_setzero_ps();
2362 for (j=0; j<nStates; j++)
2364 mTiPL = _mm_load1_ps (&tiPL[h++]);
2365 mL = _mm_mul_ps (mTiPL, clL[j]);
2366 mAcumL = _mm_add_ps (mL, mAcumL);
2368 mAcumL = _mm_mul_ps (mAcumL, mAcumR);
2369 *(clP++) = _mm_mul_ps (mAcumL,mAcumA);
2373 tiPL += nStatesSquared;
2377 for (k=0; k<m->numGammaCats; k++)
2379 for (c=t=0; c<m->numSSEChars; c++)
2381 for (c1=0; c1<FLOATS_PER_VEC; c1++,t++)
2383 preLikeRV[c1] = &preLikeR[rState[t] + k*(preLikeJump+nStates)];
2384 preLikeLV[c1] = &preLikeL[lState[t] + k*(preLikeJump+nStates)];
2385 preLikeAV[c1] = &preLikeA[aState[t] + k*(preLikeJump+nStates)];
2387 for (i=0; i<nStates; i++)
2389 assert (FLOATS_PER_VEC == 4); /* In the following 2 statments we assume that SSE register can hold exactly 4 ClFlts. */
2390 mL = _mm_set_ps (*(preLikeLV[3]++), *(preLikeLV[2]++), *(preLikeLV[1]++), *(preLikeLV[0]++));
2391 mR = _mm_set_ps (*(preLikeRV[3]++), *(preLikeRV[2]++), *(preLikeRV[1]++), *(preLikeRV[0]++));
2392 mA = _mm_set_ps (*(preLikeAV[3]++), *(preLikeAV[2]++), *(preLikeAV[1]++), *(preLikeAV[0]++));
2393 mL = _mm_mul_ps (mL,mR);
2394 *(clP++) = _mm_mul_ps (mL,mA);
2406 /*----------------------------------------------------------------
2408 | CondLikeRoot_Gen_GibbsGamma: general n-state model with rate
2409 | variation modeled using a discrete gamma distribution with
2410 | Gibbs resampling of rate categories
2412 -----------------------------------------------------------------*/
2413 int CondLikeRoot_Gen_GibbsGamma (TreeNode *p, int division, int chain)
2415 int a, b, c, i, j, r, *rateCat, shortCut, *lState=NULL,
2416 *rState=NULL, *aState=NULL, nObsStates, nStates,
2417 nStatesSquared, nGammaCats;
2418 CLFlt likeL, likeR, likeA, *clL, *clR, *clP, *clA, *pL, *pR, *pA,
2419 *tiPL, *tiPR, *tiPA;
2421 # if !defined (DEBUG_NOSHORTCUTS)
2425 /* find model settings for this division and nStates, nStatesSquared */
2426 m = &modelSettings[division];
2427 nObsStates = m->numStates;
2428 nStates = m->numModelStates;
2429 nStatesSquared = nStates * nStates;
2431 /* flip conditional likelihood space */
2432 FlipCondLikeSpace (m, chain, p->index);
2434 /* find conditional likelihood pointers */
2435 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
2436 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
2437 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
2438 clA = m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
2440 /* find transition probabilities (or calculate instead) */
2441 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
2442 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
2443 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
2445 /* find rate category index and number of gamma categories */
2446 rateCat = m->tiIndex + chain * m->numChars;
2447 nGammaCats = m->numGammaCats;
2449 /* find likelihoods of site patterns for left branch if terminal */
2451 # if !defined (DEBUG_NOSHORTCUTS)
2452 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
2455 lState = m->termState[p->left->index];
2457 for (k=a=0; k<nGammaCats; k++)
2460 for (i=0; i<nObsStates; i++)
2461 for (j=i; j<nStatesSquared; j+=nStates)
2462 preLikeL[a++] = tiPL[j];
2463 for (b=1; b<nStates/nObsStates; b++)
2466 for (i=0; i<nObsStates; i++)
2468 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
2469 preLikeL[a++] += tiPL[j];
2473 for (i=0; i<nStates; i++)
2474 preLikeL[a++] = 1.0;
2475 tiPL += nStatesSquared;
2479 /* find likelihoods of site patterns for right branch if terminal */
2480 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
2483 rState = m->termState[p->right->index];
2485 for (k=a=0; k<nGammaCats; k++)
2488 for (i=0; i<nObsStates; i++)
2489 for (j=i; j<nStatesSquared; j+=nStates)
2490 preLikeR[a++] = tiPR[j];
2491 for (b=1; b<nStates/nObsStates; b++)
2494 for (i=0; i<nObsStates; i++)
2496 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
2497 preLikeR[a++] += tiPR[j];
2501 for (i=0; i<nStates; i++)
2502 preLikeR[a++] = 1.0;
2503 tiPR += nStatesSquared;
2507 /* find likelihoods of site patterns for anc branch, always terminal */
2508 if (m->isPartAmbig[p->anc->index] == YES)
2514 aState = m->termState[p->anc->index];
2516 for (k=a=0; k<nGammaCats; k++)
2519 for (i=0; i<nObsStates; i++)
2520 for (j=i; j<nStatesSquared; j+=nStates)
2521 preLikeA[a++] = tiPA[j];
2522 for (b=1; b<nStates/nObsStates; b++)
2525 for (i=0; i<nObsStates; i++)
2527 for (j=i+b*nObsStates; j<nStatesSquared; j+=nStates)
2528 preLikeA[a++] += tiPA[j];
2532 for (i=0; i<nStates; i++)
2533 preLikeA[a++] = 1.0;
2534 tiPA += nStatesSquared;
2544 for (c=0; c<m->numChars; c++)
2549 tiPL = pL + r*nStatesSquared;
2550 tiPR = pR + r*nStatesSquared;
2551 tiPA = pA + r*nStatesSquared;
2552 for (i=0; i<nStates; i++)
2554 likeL = likeR = likeA = 0.0;
2555 for (j=0; j<nStates; j++)
2557 likeL += (*tiPL++) * clL[j];
2558 likeR += (*tiPR++) * clR[j];
2559 likeA += (*tiPA++) * clA[j];
2561 *(clP++) = likeL * likeR * likeA;
2573 for (c=0; c<m->numChars; c++)
2578 tiPL = pL + r*nStatesSquared;
2579 tiPR = pR + r*nStatesSquared;
2580 a = aState[c] + r*(nStatesSquared+nStates);
2581 for (i=0; i<nStates; i++)
2583 likeL = likeR = 0.0;
2584 for (j=0; j<nStates; j++)
2586 likeL += (*tiPL++) * clL[j];
2587 likeR += (*tiPR++) * clR[j];
2589 *(clP++) = likeL * likeR * preLikeA[a++];
2599 for (c=0; c<m->numChars; c++)
2604 tiPR = pR + r*nStatesSquared;
2605 a = lState[c] + r*(nStatesSquared+nStates);
2606 b = aState[c] + r*(nStatesSquared+nStates);
2607 for (i=0; i<nStates; i++)
2610 for (j=0; j<nStates; j++)
2612 likeR += (*tiPR++) * clR[j];
2614 *(clP++) = preLikeL[a++] * likeR * preLikeA[b++];
2623 for (c=0; c<m->numChars; c++)
2628 tiPL = pL + r*nStatesSquared;
2629 a = rState[c] + r*(nStatesSquared+nStates);
2630 b = aState[c] + r*(nStatesSquared+nStates);
2631 for (i=0; i<nStates; i++)
2634 for (j=0; j<nStates; j++)
2636 likeL += (*tiPL++) * clL[j];
2638 *(clP++) = likeL * preLikeR[a++] * preLikeA[b++];
2652 /*----------------------------------------------------------------
2654 | CondLikeRoot_NUC4: 4by4 nucleotide model with or without rate
2657 -----------------------------------------------------------------*/
2658 int CondLikeRoot_NUC4 (TreeNode *p, int division, int chain)
2660 int a, c, h, i, j, k, shortCut, *lState=NULL, *rState=NULL, *aState=NULL;
2661 CLFlt *clL, *clR, *clP, *clA, *pL, *pR, *pA, *tiPL, *tiPR, *tiPA;
2664 m = &modelSettings[division];
2666 /* flip state of node so that we are not overwriting old cond likes */
2667 FlipCondLikeSpace (m, chain, p->index);
2669 /* find conditional likelihood pointers */
2670 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
2671 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
2672 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
2673 clA = m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
2675 /* find transition probabilities (or calculate instead) */
2676 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
2677 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
2678 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
2680 /* find likelihoods of site patterns for left branch if terminal */
2682 # if !defined (DEBUG_NOSHORTCUTS)
2683 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
2686 lState = m->termState[p->left->index];
2688 for (k=j=0; k<m->numGammaCats; k++)
2692 preLikeL[j++] = tiPL[0];
2693 preLikeL[j++] = tiPL[4];
2694 preLikeL[j++] = tiPL[8];
2695 preLikeL[j++] = tiPL[12];
2700 preLikeL[j++] = 1.0;
2705 /* find likelihoods of site patterns for right branch if terminal */
2706 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
2709 rState = m->termState[p->right->index];
2711 for (k=j=0; k<m->numGammaCats; k++)
2715 preLikeR[j++] = tiPR[0];
2716 preLikeR[j++] = tiPR[4];
2717 preLikeR[j++] = tiPR[8];
2718 preLikeR[j++] = tiPR[12];
2723 preLikeR[j++] = 1.0;
2728 /* find likelihoods of site patterns for anc branch, always terminal */
2729 if (m->isPartAmbig[p->anc->index] == YES)
2735 aState = m->termState[p->anc->index];
2737 for (k=j=0; k<m->numGammaCats; k++)
2741 preLikeA[j++] = tiPA[0];
2742 preLikeA[j++] = tiPA[4];
2743 preLikeA[j++] = tiPA[8];
2744 preLikeA[j++] = tiPA[12];
2749 preLikeA[j++] = 1.0;
2763 for (k=h=0; k<m->numGammaCats; k++)
2765 for (c=0; c<m->numChars; c++)
2767 clP[h++] = (tiPL[AA]*clL[A] + tiPL[AC]*clL[C] + tiPL[AG]*clL[G] + tiPL[AT]*clL[T])
2768 *(tiPR[AA]*clR[A] + tiPR[AC]*clR[C] + tiPR[AG]*clR[G] + tiPR[AT]*clR[T])
2769 *(tiPA[AA]*clA[A] + tiPA[AC]*clA[C] + tiPA[AG]*clA[G] + tiPA[AT]*clA[T]);
2770 clP[h++] = (tiPL[CA]*clL[A] + tiPL[CC]*clL[C] + tiPL[CG]*clL[G] + tiPL[CT]*clL[T])
2771 *(tiPR[CA]*clR[A] + tiPR[CC]*clR[C] + tiPR[CG]*clR[G] + tiPR[CT]*clR[T])
2772 *(tiPA[CA]*clA[A] + tiPA[CC]*clA[C] + tiPA[CG]*clA[G] + tiPA[CT]*clA[T]);
2773 clP[h++] = (tiPL[GA]*clL[A] + tiPL[GC]*clL[C] + tiPL[GG]*clL[G] + tiPL[GT]*clL[T])
2774 *(tiPR[GA]*clR[A] + tiPR[GC]*clR[C] + tiPR[GG]*clR[G] + tiPR[GT]*clR[T])
2775 *(tiPA[GA]*clA[A] + tiPA[GC]*clA[C] + tiPA[GG]*clA[G] + tiPA[GT]*clA[T]);
2776 clP[h++] = (tiPL[TA]*clL[A] + tiPL[TC]*clL[C] + tiPL[TG]*clL[G] + tiPL[TT]*clL[T])
2777 *(tiPR[TA]*clR[A] + tiPR[TC]*clR[C] + tiPR[TG]*clR[G] + tiPR[TT]*clR[T])
2778 *(tiPA[TA]*clA[A] + tiPA[TC]*clA[C] + tiPA[TG]*clA[G] + tiPA[TT]*clA[T]);
2792 for (k=h=0; k<m->numGammaCats; k++)
2794 for (c=0; c<m->numChars; c++)
2796 i = aState[c] + k*20;
2797 clP[h++] = (tiPL[AA]*clL[A] + tiPL[AC]*clL[C] + tiPL[AG]*clL[G] + tiPL[AT]*clL[T])
2798 *(tiPR[AA]*clR[A] + tiPR[AC]*clR[C] + tiPR[AG]*clR[G] + tiPR[AT]*clR[T])
2800 clP[h++] = (tiPL[CA]*clL[A] + tiPL[CC]*clL[C] + tiPL[CG]*clL[G] + tiPL[CT]*clL[T])
2801 *(tiPR[CA]*clR[A] + tiPR[CC]*clR[C] + tiPR[CG]*clR[G] + tiPR[CT]*clR[T])
2803 clP[h++] = (tiPL[GA]*clL[A] + tiPL[GC]*clL[C] + tiPL[GG]*clL[G] + tiPL[GT]*clL[T])
2804 *(tiPR[GA]*clR[A] + tiPR[GC]*clR[C] + tiPR[GG]*clR[G] + tiPR[GT]*clR[T])
2806 clP[h++] = (tiPL[TA]*clL[A] + tiPL[TC]*clL[C] + tiPL[TG]*clL[G] + tiPL[TT]*clL[T])
2807 *(tiPR[TA]*clR[A] + tiPR[TC]*clR[C] + tiPR[TG]*clR[G] + tiPR[TT]*clR[T])
2819 for (k=h=0; k<m->numGammaCats; k++)
2821 for (c=0; c<m->numChars; c++)
2823 i = lState[c] + k*20;
2824 j = aState[c] + k*20;
2825 clP[h++] = (tiPR[AA]*clR[A] + tiPR[AC]*clR[C] + tiPR[AG]*clR[G] + tiPR[AT]*clR[T])
2826 *preLikeL[i++]*preLikeA[j++];
2827 clP[h++] = (tiPR[CA]*clR[A] + tiPR[CC]*clR[C] + tiPR[CG]*clR[G] + tiPR[CT]*clR[T])
2828 *preLikeL[i++]*preLikeA[j++];
2829 clP[h++] = (tiPR[GA]*clR[A] + tiPR[GC]*clR[C] + tiPR[GG]*clR[G] + tiPR[GT]*clR[T])
2830 *preLikeL[i++]*preLikeA[j++];
2831 clP[h++] = (tiPR[TA]*clR[A] + tiPR[TC]*clR[C] + tiPR[TG]*clR[G] + tiPR[TT]*clR[T])
2832 *preLikeL[i++]*preLikeA[j++];
2841 for (k=h=0; k<m->numGammaCats; k++)
2843 for (c=0; c<m->numChars; c++)
2845 i = rState[c] + k*20;
2846 j = aState[c] + k*20;
2847 clP[h++] = (tiPL[AA]*clL[A] + tiPL[AC]*clL[C] + tiPL[AG]*clL[G] + tiPL[AT]*clL[T])
2848 *preLikeR[i++]*preLikeA[j++];
2849 clP[h++] = (tiPL[CA]*clL[A] + tiPL[CC]*clL[C] + tiPL[CG]*clL[G] + tiPL[CT]*clL[T])
2850 *preLikeR[i++]*preLikeA[j++];
2851 clP[h++] = (tiPL[GA]*clL[A] + tiPL[GC]*clL[C] + tiPL[GG]*clL[G] + tiPL[GT]*clL[T])
2852 *preLikeR[i++]*preLikeA[j++];
2853 clP[h++] = (tiPL[TA]*clL[A] + tiPL[TC]*clL[C] + tiPL[TG]*clL[G] + tiPL[TT]*clL[T])
2854 *preLikeR[i++]*preLikeA[j++];
2862 for (k=h=0; k<m->numGammaCats; k++)
2864 for (c=0; c<m->numChars; c++)
2866 a = lState[c] + k*20;
2867 i = rState[c] + k*20;
2868 j = aState[c] + k*20;
2869 clP[h++] = preLikeL[a++]*preLikeR[i++]*preLikeA[j++];
2870 clP[h++] = preLikeL[a++]*preLikeR[i++]*preLikeA[j++];
2871 clP[h++] = preLikeL[a++]*preLikeR[i++]*preLikeA[j++];
2872 clP[h++] = preLikeL[a++]*preLikeR[i++]*preLikeA[j++];
2882 /*----------------------------------------------------------------
2884 | CondLikeRoot_NUC4_GibbsGamma: 4by4 nucleotide model with rate
2885 | variation approimated by Gibbs sampling from gamma
2887 -----------------------------------------------------------------*/
2888 int CondLikeRoot_NUC4_GibbsGamma (TreeNode *p, int division, int chain)
2890 int c, h, i, j, r, *rateCat, shortCut, *lState=NULL, *rState=NULL, *aState=NULL,
2892 CLFlt *clL, *clR, *clP, *clA, *pL, *pR, *pA, *tiPL, *tiPR, *tiPA;
2894 # if !defined (DEBUG_NOSHORTCUTS)
2898 m = &modelSettings[division];
2900 /* flip conditional likelihood space */
2901 FlipCondLikeSpace (m, chain, p->index);
2903 /* find conditional likelihood pointers */
2904 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
2905 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
2906 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
2907 clA = m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
2909 /* find transition probabilities (or calculate instead) */
2910 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
2911 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
2912 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
2914 /* find rate category index and number of gamma categories */
2915 rateCat = m->tiIndex + chain * m->numChars;
2916 nGammaCats = m->numGammaCats;
2918 /* find likelihoods of site patterns for left branch if terminal */
2920 # if !defined (DEBUG_NOSHORTCUTS)
2921 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
2924 lState = m->termState[p->left->index];
2926 for (k=j=0; k<nGammaCats; k++)
2930 preLikeL[j++] = tiPL[0];
2931 preLikeL[j++] = tiPL[4];
2932 preLikeL[j++] = tiPL[8];
2933 preLikeL[j++] = tiPL[12];
2938 preLikeL[j++] = 1.0;
2943 /* find likelihoods of site patterns for right branch if terminal */
2944 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
2947 rState = m->termState[p->right->index];
2949 for (k=j=0; k<nGammaCats; k++)
2953 preLikeR[j++] = tiPR[0];
2954 preLikeR[j++] = tiPR[4];
2955 preLikeR[j++] = tiPR[8];
2956 preLikeR[j++] = tiPR[12];
2961 preLikeR[j++] = 1.0;
2966 /* find likelihoods of site patterns for anc branch, always terminal */
2967 if (m->isPartAmbig[p->anc->index] == YES)
2973 aState = m->termState[p->anc->index];
2975 for (k=j=0; k<nGammaCats; k++)
2979 preLikeA[j++] = tiPA[0];
2980 preLikeA[j++] = tiPA[4];
2981 preLikeA[j++] = tiPA[8];
2982 preLikeA[j++] = tiPA[12];
2987 preLikeA[j++] = 1.0;
2998 for (c=h=0; c<m->numChars; c++)
3006 clP[h++] = (tiPL[AA]*clL[A] + tiPL[AC]*clL[C] + tiPL[AG]*clL[G] + tiPL[AT]*clL[T])
3007 *(tiPR[AA]*clR[A] + tiPR[AC]*clR[C] + tiPR[AG]*clR[G] + tiPR[AT]*clR[T])
3008 *(tiPA[AA]*clA[A] + tiPA[AC]*clA[C] + tiPA[AG]*clA[G] + tiPA[AT]*clA[T]);
3009 clP[h++] = (tiPL[CA]*clL[A] + tiPL[CC]*clL[C] + tiPL[CG]*clL[G] + tiPL[CT]*clL[T])
3010 *(tiPR[CA]*clR[A] + tiPR[CC]*clR[C] + tiPR[CG]*clR[G] + tiPR[CT]*clR[T])
3011 *(tiPA[CA]*clA[A] + tiPA[CC]*clA[C] + tiPA[CG]*clA[G] + tiPA[CT]*clA[T]);
3012 clP[h++] = (tiPL[GA]*clL[A] + tiPL[GC]*clL[C] + tiPL[GG]*clL[G] + tiPL[GT]*clL[T])
3013 *(tiPR[GA]*clR[A] + tiPR[GC]*clR[C] + tiPR[GG]*clR[G] + tiPR[GT]*clR[T])
3014 *(tiPA[GA]*clA[A] + tiPA[GC]*clA[C] + tiPA[GG]*clA[G] + tiPA[GT]*clA[T]);
3015 clP[h++] = (tiPL[TA]*clL[A] + tiPL[TC]*clL[C] + tiPL[TG]*clL[G] + tiPL[TT]*clL[T])
3016 *(tiPR[TA]*clR[A] + tiPR[TC]*clR[C] + tiPR[TG]*clR[G] + tiPR[TT]*clR[T])
3017 *(tiPA[TA]*clA[A] + tiPA[TC]*clA[C] + tiPA[TG]*clA[G] + tiPA[TT]*clA[T]);
3029 for (c=h=0; c<m->numChars; c++)
3036 i = aState[c] + r * 20;
3037 clP[h++] = (tiPL[AA]*clL[A] + tiPL[AC]*clL[C] + tiPL[AG]*clL[G] + tiPL[AT]*clL[T])
3038 *(tiPR[AA]*clR[A] + tiPR[AC]*clR[C] + tiPR[AG]*clR[G] + tiPR[AT]*clR[T])
3040 clP[h++] = (tiPL[CA]*clL[A] + tiPL[CC]*clL[C] + tiPL[CG]*clL[G] + tiPL[CT]*clL[T])
3041 *(tiPR[CA]*clR[A] + tiPR[CC]*clR[C] + tiPR[CG]*clR[G] + tiPR[CT]*clR[T])
3043 clP[h++] = (tiPL[GA]*clL[A] + tiPL[GC]*clL[C] + tiPL[GG]*clL[G] + tiPL[GT]*clL[T])
3044 *(tiPR[GA]*clR[A] + tiPR[GC]*clR[C] + tiPR[GG]*clR[G] + tiPR[GT]*clR[T])
3046 clP[h++] = (tiPL[TA]*clL[A] + tiPL[TC]*clL[C] + tiPL[TG]*clL[G] + tiPL[TT]*clL[T])
3047 *(tiPR[TA]*clR[A] + tiPR[TC]*clR[C] + tiPR[TG]*clR[G] + tiPR[TT]*clR[T])
3058 for (c=h=0; c<m->numChars; c++)
3064 i = lState[c] + r * 20;
3065 j = aState[c] + r * 20;
3066 clP[h++] = (tiPR[AA]*clR[A] + tiPR[AC]*clR[C] + tiPR[AG]*clR[G] + tiPR[AT]*clR[T])
3067 *preLikeL[i++]*preLikeA[j++];
3068 clP[h++] = (tiPR[CA]*clR[A] + tiPR[CC]*clR[C] + tiPR[CG]*clR[G] + tiPR[CT]*clR[T])
3069 *preLikeL[i++]*preLikeA[j++];
3070 clP[h++] = (tiPR[GA]*clR[A] + tiPR[GC]*clR[C] + tiPR[GG]*clR[G] + tiPR[GT]*clR[T])
3071 *preLikeL[i++]*preLikeA[j++];
3072 clP[h++] = (tiPR[TA]*clR[A] + tiPR[TC]*clR[C] + tiPR[TG]*clR[G] + tiPR[TT]*clR[T])
3073 *preLikeL[i++]*preLikeA[j++];
3082 for (c=h=0; c<m->numChars; c++)
3088 i = rState[c] + r * 20;
3089 j = aState[c] + r * 20;
3090 clP[h++] = (tiPL[AA]*clL[A] + tiPL[AC]*clL[C] + tiPL[AG]*clL[G] + tiPL[AT]*clL[T])
3091 *preLikeR[i++]*preLikeA[j++];
3092 clP[h++] = (tiPL[CA]*clL[A] + tiPL[CC]*clL[C] + tiPL[CG]*clL[G] + tiPL[CT]*clL[T])
3093 *preLikeR[i++]*preLikeA[j++];
3094 clP[h++] = (tiPL[GA]*clL[A] + tiPL[GC]*clL[C] + tiPL[GG]*clL[G] + tiPL[GT]*clL[T])
3095 *preLikeR[i++]*preLikeA[j++];
3096 clP[h++] = (tiPL[TA]*clL[A] + tiPL[TC]*clL[C] + tiPL[TG]*clL[G] + tiPL[TT]*clL[T])
3097 *preLikeR[i++]*preLikeA[j++];
3110 #if defined (SSE_ENABLED)
3111 /*----------------------------------------------------------------
3113 | CondLikeRoot_NUC4_SSE: 4by4 nucleotide model with or without rate
3114 | variation using SSE instructions
3116 -----------------------------------------------------------------*/
3117 int CondLikeRoot_NUC4_SSE (TreeNode *p, int division, int chain)
3120 CLFlt *pL, *pR, *pA, *tiPL, *tiPR, *tiPA;
3121 __m128 *clL, *clR, *clP, *clA;
3122 __m128 m1, m2, m3, m4, m5, m6, m7, m8, m9;
3125 m = &modelSettings[division];
3127 /* flip state of node so that we are not overwriting old cond likes */
3128 FlipCondLikeSpace (m, chain, p->index);
3130 /* find conditional likelihood pointers */
3131 clL = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->left->index ]];
3132 clR = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->right->index]];
3133 clP = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->index ]];
3134 clA = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
3136 /* find transition probabilities */
3137 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
3138 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
3139 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
3144 for (k=0; k<m->numGammaCats; k++)
3146 for (c=0; c<m->numSSEChars; c++)
3148 m1 = _mm_load1_ps (&tiPL[AA]);
3149 m2 = _mm_load1_ps (&tiPR[AA]);
3150 m3 = _mm_load1_ps (&tiPA[AA]);
3151 m7 = _mm_mul_ps (m1, clL[A]);
3152 m8 = _mm_mul_ps (m2, clR[A]);
3153 m9 = _mm_mul_ps (m3, clA[A]);
3155 m1 = _mm_load1_ps (&tiPL[AC]);
3156 m2 = _mm_load1_ps (&tiPR[AC]);
3157 m3 = _mm_load1_ps (&tiPA[AC]);
3158 m4 = _mm_mul_ps (m1, clL[C]);
3159 m5 = _mm_mul_ps (m2, clR[C]);
3160 m6 = _mm_mul_ps (m3, clA[C]);
3161 m7 = _mm_add_ps (m4, m7);
3162 m8 = _mm_add_ps (m5, m8);
3163 m9 = _mm_add_ps (m6, m9);
3165 m1 = _mm_load1_ps (&tiPL[AG]);
3166 m2 = _mm_load1_ps (&tiPR[AG]);
3167 m3 = _mm_load1_ps (&tiPA[AG]);
3168 m4 = _mm_mul_ps (m1, clL[G]);
3169 m5 = _mm_mul_ps (m2, clR[G]);
3170 m6 = _mm_mul_ps (m3, clA[G]);
3171 m7 = _mm_add_ps (m4, m7);
3172 m8 = _mm_add_ps (m5, m8);
3173 m9 = _mm_add_ps (m6, m9);
3175 m1 = _mm_load1_ps (&tiPL[AT]);
3176 m2 = _mm_load1_ps (&tiPR[AT]);
3177 m3 = _mm_load1_ps (&tiPA[AT]);
3178 m4 = _mm_mul_ps (m1, clL[T]);
3179 m5 = _mm_mul_ps (m2, clR[T]);
3180 m6 = _mm_mul_ps (m3, clA[T]);
3181 m7 = _mm_add_ps (m4, m7);
3182 m8 = _mm_add_ps (m5, m8);
3183 m9 = _mm_add_ps (m6, m9);
3185 m7 = _mm_mul_ps (m7, m8);
3186 *clP++ = _mm_mul_ps (m7, m9);
3188 m1 = _mm_load1_ps (&tiPL[CA]);
3189 m2 = _mm_load1_ps (&tiPR[CA]);
3190 m3 = _mm_load1_ps (&tiPA[CA]);
3191 m7 = _mm_mul_ps (m1, clL[A]);
3192 m8 = _mm_mul_ps (m2, clR[A]);
3193 m9 = _mm_mul_ps (m3, clA[A]);
3195 m1 = _mm_load1_ps (&tiPL[CC]);
3196 m2 = _mm_load1_ps (&tiPR[CC]);
3197 m3 = _mm_load1_ps (&tiPA[CC]);
3198 m4 = _mm_mul_ps (m1, clL[C]);
3199 m5 = _mm_mul_ps (m2, clR[C]);
3200 m6 = _mm_mul_ps (m3, clA[C]);
3201 m7 = _mm_add_ps (m4, m7);
3202 m8 = _mm_add_ps (m5, m8);
3203 m9 = _mm_add_ps (m6, m9);
3205 m1 = _mm_load1_ps (&tiPL[CG]);
3206 m2 = _mm_load1_ps (&tiPR[CG]);
3207 m3 = _mm_load1_ps (&tiPA[CG]);
3208 m4 = _mm_mul_ps (m1, clL[G]);
3209 m5 = _mm_mul_ps (m2, clR[G]);
3210 m6 = _mm_mul_ps (m3, clA[G]);
3211 m7 = _mm_add_ps (m4, m7);
3212 m8 = _mm_add_ps (m5, m8);
3213 m9 = _mm_add_ps (m6, m9);
3215 m1 = _mm_load1_ps (&tiPL[CT]);
3216 m2 = _mm_load1_ps (&tiPR[CT]);
3217 m3 = _mm_load1_ps (&tiPA[CT]);
3218 m4 = _mm_mul_ps (m1, clL[T]);
3219 m5 = _mm_mul_ps (m2, clR[T]);
3220 m6 = _mm_mul_ps (m3, clA[T]);
3221 m7 = _mm_add_ps (m4, m7);
3222 m8 = _mm_add_ps (m5, m8);
3223 m9 = _mm_add_ps (m6, m9);
3225 m7 = _mm_mul_ps (m7, m8);
3226 *clP++ = _mm_mul_ps (m7, m9);
3228 m1 = _mm_load1_ps (&tiPL[GA]);
3229 m2 = _mm_load1_ps (&tiPR[GA]);
3230 m3 = _mm_load1_ps (&tiPA[GA]);
3231 m7 = _mm_mul_ps (m1, clL[A]);
3232 m8 = _mm_mul_ps (m2, clR[A]);
3233 m9 = _mm_mul_ps (m3, clA[A]);
3235 m1 = _mm_load1_ps (&tiPL[GC]);
3236 m2 = _mm_load1_ps (&tiPR[GC]);
3237 m3 = _mm_load1_ps (&tiPA[GC]);
3238 m4 = _mm_mul_ps (m1, clL[C]);
3239 m5 = _mm_mul_ps (m2, clR[C]);
3240 m6 = _mm_mul_ps (m3, clA[C]);
3241 m7 = _mm_add_ps (m4, m7);
3242 m8 = _mm_add_ps (m5, m8);
3243 m9 = _mm_add_ps (m6, m9);
3245 m1 = _mm_load1_ps (&tiPL[GG]);
3246 m2 = _mm_load1_ps (&tiPR[GG]);
3247 m3 = _mm_load1_ps (&tiPA[GG]);
3248 m4 = _mm_mul_ps (m1, clL[G]);
3249 m5 = _mm_mul_ps (m2, clR[G]);
3250 m6 = _mm_mul_ps (m3, clA[G]);
3251 m7 = _mm_add_ps (m4, m7);
3252 m8 = _mm_add_ps (m5, m8);
3253 m9 = _mm_add_ps (m6, m9);
3255 m1 = _mm_load1_ps (&tiPL[GT]);
3256 m2 = _mm_load1_ps (&tiPR[GT]);
3257 m3 = _mm_load1_ps (&tiPA[GT]);
3258 m4 = _mm_mul_ps (m1, clL[T]);
3259 m5 = _mm_mul_ps (m2, clR[T]);
3260 m6 = _mm_mul_ps (m3, clA[T]);
3261 m7 = _mm_add_ps (m4, m7);
3262 m8 = _mm_add_ps (m5, m8);
3263 m9 = _mm_add_ps (m6, m9);
3265 m7 = _mm_mul_ps (m7, m8);
3266 *clP++ = _mm_mul_ps (m7, m9);
3268 m1 = _mm_load1_ps (&tiPL[TA]);
3269 m2 = _mm_load1_ps (&tiPR[TA]);
3270 m3 = _mm_load1_ps (&tiPA[TA]);
3271 m7 = _mm_mul_ps (m1, clL[A]);
3272 m8 = _mm_mul_ps (m2, clR[A]);
3273 m9 = _mm_mul_ps (m3, clA[A]);
3275 m1 = _mm_load1_ps (&tiPL[TC]);
3276 m2 = _mm_load1_ps (&tiPR[TC]);
3277 m3 = _mm_load1_ps (&tiPA[TC]);
3278 m4 = _mm_mul_ps (m1, clL[C]);
3279 m5 = _mm_mul_ps (m2, clR[C]);
3280 m6 = _mm_mul_ps (m3, clA[C]);
3281 m7 = _mm_add_ps (m4, m7);
3282 m8 = _mm_add_ps (m5, m8);
3283 m9 = _mm_add_ps (m6, m9);
3285 m1 = _mm_load1_ps (&tiPL[TG]);
3286 m2 = _mm_load1_ps (&tiPR[TG]);
3287 m3 = _mm_load1_ps (&tiPA[TG]);
3288 m4 = _mm_mul_ps (m1, clL[G]);
3289 m5 = _mm_mul_ps (m2, clR[G]);
3290 m6 = _mm_mul_ps (m3, clA[G]);
3291 m7 = _mm_add_ps (m4, m7);
3292 m8 = _mm_add_ps (m5, m8);
3293 m9 = _mm_add_ps (m6, m9);
3295 m1 = _mm_load1_ps (&tiPL[TT]);
3296 m2 = _mm_load1_ps (&tiPR[TT]);
3297 m3 = _mm_load1_ps (&tiPA[TT]);
3298 m4 = _mm_mul_ps (m1, clL[T]);
3299 m5 = _mm_mul_ps (m2, clR[T]);
3300 m6 = _mm_mul_ps (m3, clA[T]);
3301 m7 = _mm_add_ps (m4, m7);
3302 m8 = _mm_add_ps (m5, m8);
3303 m9 = _mm_add_ps (m6, m9);
3305 m7 = _mm_mul_ps (m7, m8);
3306 *clP++ = _mm_mul_ps (m7, m9);
3322 #if !defined (SSE_ENABLED) || 1
3323 /*----------------------------------------------------------------
3325 | CondLikeRoot_NY98: codon model with omega variation
3327 -----------------------------------------------------------------*/
3328 int CondLikeRoot_NY98 (TreeNode *p, int division, int chain)
3330 int a, b, c, d, h, i, j, k, shortCut, *lState=NULL, *rState=NULL, *aState=NULL,
3331 nStates, nStatesSquared;
3332 CLFlt likeL, likeR, likeA, *clL, *clR, *clP, *clA, *pL, *pR, *pA,
3333 *tiPL, *tiPR, *tiPA;
3336 /* find model settings for this division and nStates, nStatesSquared */
3337 m = &modelSettings[division];
3338 nStates = m->numModelStates;
3339 nStatesSquared = nStates * nStates;
3341 /* flip state of node so that we are not overwriting old cond likes */
3342 FlipCondLikeSpace (m, chain, p->index);
3344 /* find conditional likelihood pointers */
3345 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
3346 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
3347 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
3348 clA = m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
3350 /* find transition probabilities (or calculate instead) */
3351 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
3352 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
3353 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
3355 /* find likelihoods of site patterns for left branch if terminal */
3357 # if !defined (DEBUG_NOSHORTCUTS)
3358 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
3361 lState = m->termState[p->left->index];
3363 for (k=a=0; k<m->numOmegaCats; k++)
3365 for (i=0; i<nStates; i++)
3366 for (j=i; j<nStatesSquared; j+=nStates)
3367 preLikeL[a++] = tiPL[j];
3369 for (i=0; i<nStates; i++)
3370 preLikeL[a++] = 1.0;
3371 tiPL += nStatesSquared;
3375 /* find likelihoods of site patterns for right branch if terminal */
3376 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
3379 rState = m->termState[p->right->index];
3381 for (k=a=0; k<m->numOmegaCats; k++)
3383 for (i=0; i<nStates; i++)
3384 for (j=i; j<nStatesSquared; j+=nStates)
3385 preLikeR[a++] = tiPR[j];
3387 for (i=0; i<nStates; i++)
3388 preLikeR[a++] = 1.0;
3389 tiPR += nStatesSquared;
3393 /* find likelihoods of site patterns for anc branch, always terminal */
3394 if (m->isPartAmbig[p->anc->index] == YES)
3400 aState = m->termState[p->anc->index];
3402 for (k=a=0; k<m->numOmegaCats; k++)
3404 for (i=0; i<nStates; i++)
3405 for (j=i; j<nStatesSquared; j+=nStates)
3406 preLikeA[a++] = tiPA[j];
3408 for (i=0; i<nStates; i++)
3409 preLikeA[a++] = 1.0;
3410 tiPA += nStatesSquared;
3423 for (k=0; k<m->numOmegaCats; k++)
3425 for (c=0; c<m->numChars; c++)
3427 for (i=h=0; i<nStates; i++)
3429 likeL = likeR = likeA = 0.0;
3430 for (j=0; j<nStates; j++)
3432 likeA += tiPA[h]*clA[j];
3433 likeL += tiPL[h]*clL[j];
3434 likeR += tiPR[h++]*clR[j];
3436 *(clP++) = likeL * likeR * likeA;
3442 tiPL += nStatesSquared;
3443 tiPR += nStatesSquared;
3444 tiPA += nStatesSquared;
3450 for (k=0; k<m->numOmegaCats; k++)
3452 for (c=0; c<m->numChars; c++)
3454 b = aState[c] + k*(nStatesSquared+nStates);
3455 for (i=h=0; i<nStates; i++)
3457 likeR = likeL = 0.0;
3458 for (j=0; j<nStates; j++)
3460 likeR += tiPR[h]*clR[j];
3461 likeL += tiPL[h++]*clL[j];
3463 *(clP++) = preLikeA[b++] * likeL * likeR;
3468 tiPR += nStatesSquared;
3469 tiPL += nStatesSquared;
3474 for (k=0; k<m->numOmegaCats; k++)
3476 for (c=0; c<m->numChars; c++)
3478 a = lState[c] + k*(nStatesSquared+nStates);
3479 b = aState[c] + k*(nStatesSquared+nStates);
3480 for (i=h=0; i<nStates; i++)
3483 for (j=0; j<nStates; j++)
3485 likeR += tiPR[h++]*clR[j];
3487 *(clP++) = preLikeL[a++] * preLikeA[b++] * likeR;
3491 tiPR += nStatesSquared;
3496 for (k=0; k<m->numOmegaCats; k++)
3498 for (c=0; c<m->numChars; c++)
3500 a = rState[c] + k*(nStatesSquared+nStates);
3501 b = aState[c] + k*(nStatesSquared+nStates);
3502 for (i=h=0; i<nStates; i++)
3505 for (j=0; j<nStates; j++)
3507 likeL += tiPL[h++]*clL[j];
3509 *(clP++) = preLikeR[a++] * preLikeA[b++] * likeL;
3513 tiPL += nStatesSquared;
3517 for (k=0; k<m->numOmegaCats; k++)
3519 for (c=0; c<m->numChars; c++)
3521 a = rState[c] + k*(nStatesSquared+nStates);
3522 b = lState[c] + k*(nStatesSquared+nStates);
3523 d = aState[c] + k*(nStatesSquared+nStates);
3524 for (i=0; i<nStates; i++)
3526 *(clP++) = preLikeR[a++] * preLikeL[b++] * preLikeA[d++];
3538 #if defined (SSE_ENABLED)
3539 /*----------------------------------------------------------------
3541 | CondLikeRoot_NY98_SSE: codon model with omega variation
3543 -----------------------------------------------------------------*/
3544 int CondLikeRoot_NY98_SSE (TreeNode *p, int division, int chain)
3546 int c, c1, t, h, i, j, k, shortCut, *lState=NULL, *rState=NULL, *aState=NULL,
3547 nStates, nStatesSquared;
3548 CLFlt *pL, *pR, *pA,
3549 *tiPL, *tiPR, *tiPA;
3550 __m128 *clL, *clR, *clP, *clA;
3551 __m128 mTiPL, mTiPR, mTiPA, mL, mR, mA, mAcumL, mAcumR, mAcumA;
3553 CLFlt *preLikeRV[FLOATS_PER_VEC];
3554 CLFlt *preLikeLV[FLOATS_PER_VEC];
3555 CLFlt *preLikeAV[FLOATS_PER_VEC];
3557 # if !defined (DEBUG_NOSHORTCUTS)
3562 /* find model settings for this division and nStates, nStatesSquared */
3563 m = &modelSettings[division];
3564 nStates = m->numModelStates;
3565 nStatesSquared = nStates * nStates;
3567 /* flip state of node so that we are not overwriting old cond likes */
3568 FlipCondLikeSpace (m, chain, p->index);
3570 /* find conditional likelihood pointers */
3571 clL = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->left->index ]];
3572 clR = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->right->index]];
3573 clP = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->index ]];
3574 clA = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
3576 /* find transition probabilities (or calculate instead) */
3577 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
3578 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
3579 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
3581 /* find likelihoods of site patterns for left branch if terminal */
3583 # if !defined (DEBUG_NOSHORTCUTS)
3584 if (p->left->left == NULL && m->isPartAmbig[p->left->index] == NO)
3587 lState = m->termState[p->left->index];
3589 for (k=a=0; k<m->numOmegaCats; k++)
3591 for (i=0; i<nStates; i++)
3592 for (j=i; j<nStatesSquared; j+=nStates)
3593 preLikeL[a++] = tiPL[j];
3595 for (i=0; i<nStates; i++)
3596 preLikeL[a++] = 1.0;
3597 tiPL += nStatesSquared;
3601 /* find likelihoods of site patterns for right branch if terminal */
3602 if (p->right->left == NULL && m->isPartAmbig[p->right->index] == NO)
3605 rState = m->termState[p->right->index];
3607 for (k=a=0; k<m->numOmegaCats; k++)
3609 for (i=0; i<nStates; i++)
3610 for (j=i; j<nStatesSquared; j+=nStates)
3611 preLikeR[a++] = tiPR[j];
3613 for (i=0; i<nStates; i++)
3614 preLikeR[a++] = 1.0;
3615 tiPR += nStatesSquared;
3619 /* find likelihoods of site patterns for anc branch, always terminal */
3620 if (m->isPartAmbig[p->anc->index] == YES)
3626 aState = m->termState[p->anc->index];
3628 for (k=a=0; k<m->numOmegaCats; k++)
3630 for (i=0; i<nStates; i++)
3631 for (j=i; j<nStatesSquared; j+=nStates)
3632 preLikeA[a++] = tiPA[j];
3634 for (i=0; i<nStates; i++)
3635 preLikeA[a++] = 1.0;
3636 tiPA += nStatesSquared;
3648 for (k=0; k<m->numOmegaCats; k++)
3650 for (c=0; c<m->numSSEChars; c++)
3652 for (i=h=0; i<nStates; i++)
3654 mAcumL = _mm_setzero_ps();
3655 mAcumR = _mm_setzero_ps();
3656 mAcumA = _mm_setzero_ps();
3657 for (j=0; j<nStates; j++)
3659 mTiPL = _mm_load1_ps (&tiPL[h]);
3660 mTiPR = _mm_load1_ps (&tiPR[h]);
3661 mTiPA = _mm_load1_ps (&tiPA[h++]);
3662 mL = _mm_mul_ps (mTiPL, clL[j]);
3663 mR = _mm_mul_ps (mTiPR, clR[j]);
3664 mA = _mm_mul_ps (mTiPA, clA[j]);
3665 mAcumL = _mm_add_ps (mL, mAcumL);
3666 mAcumR = _mm_add_ps (mR, mAcumR);
3667 mAcumA = _mm_add_ps (mA, mAcumA);
3669 mAcumL = _mm_mul_ps (mAcumL, mAcumR);
3670 *(clP++) = _mm_mul_ps (mAcumL, mAcumA);
3676 tiPL += nStatesSquared;
3677 tiPR += nStatesSquared;
3678 tiPA += nStatesSquared;
3684 for (k=0; k<m->numOmegaCats; k++)
3686 for (c=t=0; c<m->numSSEChars; c++)
3688 for (c1=0; c1<FLOATS_PER_VEC; c1++,t++)
3690 preLikeAV[c1] = &preLikeA[aState[t] + k*(nStatesSquared+nStates)];
3692 for (i=h=0; i<nStates; i++)
3694 assert (FLOATS_PER_VEC == 4); /* In the following statment we assume that SSE register can hold exactly 4 ClFlts. */
3695 mAcumA = _mm_set_ps (*(preLikeAV[3]++), *(preLikeAV[2]++), *(preLikeAV[1]++), *(preLikeAV[0]++));
3696 mAcumL = _mm_setzero_ps();
3697 mAcumR = _mm_setzero_ps();
3698 for (j=0; j<nStates; j++)
3700 mTiPL = _mm_load1_ps (&tiPL[h]);
3701 mL = _mm_mul_ps (mTiPL, clL[j]);
3702 mAcumL = _mm_add_ps (mL, mAcumL);
3703 mTiPR = _mm_load1_ps (&tiPR[h++]);
3704 mR = _mm_mul_ps (mTiPR, clR[j]);
3705 mAcumR = _mm_add_ps (mR, mAcumR);
3707 mAcumL = _mm_mul_ps (mAcumL, mAcumR);
3708 *(clP++) = _mm_mul_ps (mAcumL, mAcumA);
3713 tiPL += nStatesSquared;
3714 tiPR += nStatesSquared;
3719 for (k=0; k<m->numOmegaCats; k++)
3721 for (c=t=0; c<m->numSSEChars; c++)
3723 for (c1=0; c1<FLOATS_PER_VEC; c1++,t++)
3725 preLikeLV[c1] = &preLikeL[lState[t] + k*(nStatesSquared+nStates)];
3726 preLikeAV[c1] = &preLikeA[aState[t] + k*(nStatesSquared+nStates)];
3728 for (i=h=0; i<nStates; i++)
3730 assert (FLOATS_PER_VEC == 4); /* In the following statment we assume that SSE register can hold exactly 4 ClFlts. */
3731 mAcumL = _mm_set_ps (*(preLikeLV[3]++), *(preLikeLV[2]++), *(preLikeLV[1]++), *(preLikeLV[0]++));
3732 mAcumA = _mm_set_ps (*(preLikeAV[3]++), *(preLikeAV[2]++), *(preLikeAV[1]++), *(preLikeAV[0]++));
3733 mAcumR = _mm_setzero_ps();
3734 for (j=0; j<nStates; j++)
3736 mTiPR = _mm_load1_ps (&tiPR[h++]);
3737 mR = _mm_mul_ps (mTiPR, clR[j]);
3738 mAcumR = _mm_add_ps (mR, mAcumR);
3740 mAcumL = _mm_mul_ps (mAcumL, mAcumR);
3741 *(clP++) = _mm_mul_ps (mAcumL, mAcumA);
3745 tiPR += nStatesSquared;
3750 for (k=0; k<m->numOmegaCats; k++)
3752 for (c=t=0; c<m->numSSEChars; c++)
3754 for (c1=0; c1<FLOATS_PER_VEC; c1++,t++)
3756 preLikeRV[c1] = &preLikeR[rState[t] + k*(nStatesSquared+nStates)];
3757 preLikeAV[c1] = &preLikeA[aState[t] + k*(nStatesSquared+nStates)];
3759 for (i=h=0; i<nStates; i++)
3761 assert (FLOATS_PER_VEC == 4); /* In the following statment we assume that SSE register can hold exactly 4 ClFlts. */
3762 mAcumR = _mm_set_ps (*(preLikeRV[3]++), *(preLikeRV[2]++), *(preLikeRV[1]++), *(preLikeRV[0]++));
3763 mAcumA = _mm_set_ps (*(preLikeAV[3]++), *(preLikeAV[2]++), *(preLikeAV[1]++), *(preLikeAV[0]++));
3764 mAcumL = _mm_setzero_ps();
3765 for (j=0; j<nStates; j++)
3767 mTiPL = _mm_load1_ps (&tiPL[h++]);
3768 mL = _mm_mul_ps (mTiPL, clL[j]);
3769 mAcumL = _mm_add_ps (mL, mAcumL);
3771 mAcumL = _mm_mul_ps (mAcumL, mAcumR);
3772 *(clP++) = _mm_mul_ps (mAcumL,mAcumA);
3776 tiPL += nStatesSquared;
3780 for (k=0; k<m->numOmegaCats; k++)
3782 for (c=t=0; c<m->numSSEChars; c++)
3784 for (c1=0; c1<FLOATS_PER_VEC; c1++,t++)
3786 preLikeRV[c1] = &preLikeR[rState[t] + k*(nStatesSquared+nStates)];
3787 preLikeLV[c1] = &preLikeL[lState[t] + k*(nStatesSquared+nStates)];
3788 preLikeAV[c1] = &preLikeA[aState[t] + k*(nStatesSquared+nStates)];
3790 for (i=0; i<nStates; i++)
3792 assert (FLOATS_PER_VEC == 4); /* In the following 2 statments we assume that SSE register can hold exactly 4 ClFlts. */
3793 mL = _mm_set_ps (*(preLikeLV[3]++), *(preLikeLV[2]++), *(preLikeLV[1]++), *(preLikeLV[0]++));
3794 mR = _mm_set_ps (*(preLikeRV[3]++), *(preLikeRV[2]++), *(preLikeRV[1]++), *(preLikeRV[0]++));
3795 mA = _mm_set_ps (*(preLikeAV[3]++), *(preLikeAV[2]++), *(preLikeAV[1]++), *(preLikeAV[0]++));
3796 mL = _mm_mul_ps (mL,mR);
3797 *(clP++) = _mm_mul_ps (mL,mA);
3809 /*----------------------------------------------------------------
3811 | CondLikeRoot_Std: variable number of states model
3812 | with or without rate variation
3814 -----------------------------------------------------------------*/
3815 int CondLikeRoot_Std (TreeNode *p, int division, int chain)
3817 int a, c, h, i, j, k, nStates=0, nCats=0, tmp;
3818 CLFlt *clL, *clR, *clP, *clA, *pL, *pR, *pA, *tiPL, *tiPR, *tiPA,
3819 likeL, likeR, likeA;
3822 m = &modelSettings[division];
3824 /* flip state of node so that we are not overwriting old cond likes */
3825 FlipCondLikeSpace (m, chain, p->index);
3827 /* find conditional likelihood pointers */
3828 clL = m->condLikes[m->condLikeIndex[chain][p->left->index ]];
3829 clR = m->condLikes[m->condLikeIndex[chain][p->right->index]];
3830 clP = m->condLikes[m->condLikeIndex[chain][p->index ]];
3831 clA = m->condLikes[m->condLikeIndex[chain][p->anc->index ]];
3833 /* find transition probabilities (or calculate instead) */
3834 pL = m->tiProbs[m->tiProbsIndex[chain][p->left->index ]];
3835 pR = m->tiProbs[m->tiProbsIndex[chain][p->right->index]];
3836 pA = m->tiProbs[m->tiProbsIndex[chain][p->index ]];
3838 /* calculate ancestral probabilities */
3839 for (k=h=0; k<m->numGammaCats; k++)
3841 /* calculate ancestral probabilities */
3842 for (c=0; c<m->numChars; c++)
3844 nStates = m->nStates[c];
3846 /* the following lines ensure that nCats is 1 unless */
3847 /* the character is binary and beta categories are used */
3849 nCats = m->numBetaCats;
3853 tmp = k*nStates*nStates; /* tmp contains offset to skip gamma cats that already processed*/
3854 tiPL = pL + m->tiIndex[c] + tmp;
3855 tiPR = pR + m->tiIndex[c] + tmp;
3856 tiPA = pA + m->tiIndex[c] + tmp;
3857 tmp = (m->numGammaCats-1)*2*2; /* tmp contains size of block of tpi matrices across all gamma cats (minus one) for single beta category. Further used only if character is binary to jump to next beta category */
3859 for (j=0; j<nCats;j++)
3861 for (a=0; a<nStates; a++)
3863 likeL = likeR = likeA = 0.0;
3864 for (i=0; i<nStates; i++)
3866 likeL += *(tiPL++) * clL[i];
3867 likeR += *(tiPR++) * clR[i];
3868 likeA += *(tiPA++) * clA[i];
3870 clP[h++] = likeL * likeR * likeA;
3887 /*----------------------------------------------------------------
3889 | CondLikeUp_Bin: pull likelihoods up and calculate scaled
3890 | finals, binary model with or without rate variation
3892 -----------------------------------------------------------------*/
3893 int CondLikeUp_Bin (TreeNode *p, int division, int chain)
3896 CLFlt *clFA, *clFP, *clDP, *tiP, condLikeUp[2], sum[2];
3899 /* find model settings for this division */
3900 m = &modelSettings[division];
3902 if (p->anc->anc == NULL)
3904 /* this is the root node */
3905 /* find conditional likelihood pointers = down cond likes */
3906 /* use conditional likelihood scratch space for final cond likes */
3907 clDP = m->condLikes[m->condLikeIndex[chain][p->index]];
3908 clFP = m->condLikes[m->condLikeScratchIndex[p->index]];
3910 for (k=0; k<m->numGammaCats; k++)
3912 for (c=0; c<m->numChars; c++)
3914 *(clFP++) = *(clDP++);
3915 *(clFP++) = *(clDP++);
3921 /* find conditional likelihood pointers */
3922 /* use conditional likelihood scratch space for final cond likes */
3923 clFA = m->condLikes[m->condLikeScratchIndex[p->anc->index]];
3924 clFP = m->condLikes[m->condLikeScratchIndex[p->index ]];
3925 clDP = m->condLikes[m->condLikeIndex[chain][p->index ]];
3927 /* find transition probabilities */
3928 tiP = m->tiProbs[m->tiProbsIndex[chain][p->index]];
3930 for (k=0; k<m->numGammaCats; k++)
3932 for (c=0; c<m->numChars; c++)
3934 condLikeUp[0] = condLikeUp[1] = 0.0;
3936 sum[0] = tiP[0]*clDP[0] + tiP[1]*clDP[1];
3937 sum[1] = tiP[2]*clDP[0] + tiP[3]*clDP[1];
3939 if (sum[0] != 0.0) condLikeUp[0] = clFA[0] / sum[0];
3940 if (sum[1] != 0.0) condLikeUp[1] = clFA[1] / sum[1];
3942 *(clFP++) = (condLikeUp[0]*tiP[0] + condLikeUp[1]*tiP[1])*clDP[0];
3943 *(clFP++) = (condLikeUp[0]*tiP[2] + condLikeUp[1]*tiP[3])*clDP[1];
3956 /*----------------------------------------------------------------
3958 | CondLikeUp_Gen: pull likelihoods up and calculate scaled
3959 | finals for an interior node
3961 -----------------------------------------------------------------*/
3962 int CondLikeUp_Gen (TreeNode *p, int division, int chain)
3964 int a, c, i, j, k, nStates, nStatesSquared, nGammaCats;
3965 CLFlt *clFA, *clFP, *clDP, *tiP, *condLikeUp, sum;
3968 /* find model settings for this division */
3969 m = &modelSettings[division];
3971 /* find number of states in the model */
3972 nStates = m->numModelStates;
3973 nStatesSquared = nStates * nStates;
3975 /* find number of gamma cats */
3976 nGammaCats = m->numGammaCats;
3977 if (m->gibbsGamma == YES)
3980 /* use preallocated scratch space */
3981 condLikeUp = m->ancStateCondLikes;
3983 /* calculate final states */
3984 if (p->anc->anc == NULL)
3986 /* this is the root node */
3987 /* find conditional likelihood pointers = down cond likes */
3988 /* use conditional likelihood scratch space for final cond likes */
3989 clDP = m->condLikes[m->condLikeIndex[chain][p->index]];
3990 clFP = m->condLikes[m->condLikeScratchIndex[p->index]];
3992 /* final cond likes = downpass cond likes */
3993 for (k=0; k<nGammaCats; k++)
3995 /* copy cond likes */
3996 for (c=0; c<m->numChars*nStates; c++)
3997 *(clFP++) = *(clDP++);
4002 /* find conditional likelihood pointers */
4003 /* use conditional likelihood scratch space for final cond likes */
4004 clFA = m->condLikes[m->condLikeScratchIndex[p->anc->index]];
4005 clFP = m->condLikes[m->condLikeScratchIndex[p->index ]];
4006 clDP = m->condLikes[m->condLikeIndex[chain][p->index ]];
4008 /* find transition probabilities */
4009 tiP = m->tiProbs[m->tiProbsIndex[chain][p->index]];
4011 for (k=0; k<nGammaCats; k++)
4013 for (c=0; c<m->numChars; c++)
4015 for (a=j=0; a<nStates; a++)
4018 for (i=0; i<nStates; i++)
4019 sum += tiP[j++]*clDP[i];
4020 if (sum != 0.0) condLikeUp[a] = clFA[a] / sum;
4023 for (a=j=0; a<nStates; a++)
4026 for (i=0; i<nStates; i++)
4028 sum += condLikeUp[i] * tiP[j++];
4030 *(clFP++) = sum * clDP[a];
4036 tiP += nStatesSquared;
4044 /*----------------------------------------------------------------
4046 | CondLikeUp_NUC4: pull likelihoods up and calculate scaled
4047 | finals for an interior node
4049 -----------------------------------------------------------------*/
4050 int CondLikeUp_NUC4 (TreeNode *p, int division, int chain)
4052 int c, k, nGammaCats;
4053 CLFlt *clFA, *clFP, *clDP, *tiP, condLikeUp[4], sum[4];
4056 /* find model settings for this division */
4057 m = &modelSettings[division];
4059 /* find number of gamma cats */
4060 nGammaCats = m->numGammaCats;
4061 if (m->gibbsGamma == YES)
4064 /* calculate final states */
4065 if (p->anc->anc == NULL)
4067 /* this is the root node */
4068 /* find conditional likelihood pointers = down cond likes */
4069 /* use conditional likelihood scratch space for final cond likes */
4070 clDP = m->condLikes[m->condLikeIndex[chain][p->index]];
4071 clFP = m->condLikes[m->condLikeScratchIndex[p->index]];
4073 /* final cond likes = downpass cond likes */
4074 for (k=0; k<nGammaCats; k++)
4076 /* copy cond likes */
4077 for (c=0; c<m->numChars; c++)
4079 *(clFP++) = *(clDP++);
4080 *(clFP++) = *(clDP++);
4081 *(clFP++) = *(clDP++);
4082 *(clFP++) = *(clDP++);
4088 /* find conditional likelihood pointers */
4089 /* use conditional likelihood scratch space for final cond likes */
4090 clFA = m->condLikes[m->condLikeScratchIndex[p->anc->index]];
4091 clFP = m->condLikes[m->condLikeScratchIndex[p->index ]];
4092 clDP = m->condLikes[m->condLikeIndex[chain][p->index ]];
4094 /* find transition probabilities */
4095 tiP = m->tiProbs[m->tiProbsIndex[chain][p->index]];
4097 for (k=0; k<nGammaCats; k++)
4099 for (c=0; c<m->numChars; c++)
4101 condLikeUp[A] = condLikeUp[C] = condLikeUp[G] = condLikeUp[T] = 0.0;
4103 sum[A] = (tiP[AA]*clDP[A] + tiP[AC]*clDP[C] + tiP[AG]*clDP[G] + tiP[AT]*clDP[T]);
4104 sum[C] = (tiP[CA]*clDP[A] + tiP[CC]*clDP[C] + tiP[CG]*clDP[G] + tiP[CT]*clDP[T]);
4105 sum[G] = (tiP[GA]*clDP[A] + tiP[GC]*clDP[C] + tiP[GG]*clDP[G] + tiP[GT]*clDP[T]);
4106 sum[T] = (tiP[TA]*clDP[A] + tiP[TC]*clDP[C] + tiP[TG]*clDP[G] + tiP[TT]*clDP[T]);
4108 if (sum[A] != 0.0) condLikeUp[A] = clFA[A] / sum[A];
4109 if (sum[C] != 0.0) condLikeUp[C] = clFA[C] / sum[C];
4110 if (sum[G] != 0.0) condLikeUp[G] = clFA[G] / sum[G];
4111 if (sum[T] != 0.0) condLikeUp[T] = clFA[T] / sum[T];
4114 clFP[A] = (condLikeUp[A]*tiP[AA] + condLikeUp[C]*tiP[CA] + condLikeUp[G]*tiP[GA] + condLikeUp[T]*tiP[TA])*clDP[A];
4115 clFP[C] = (condLikeUp[A]*tiP[AC] + condLikeUp[C]*tiP[CC] + condLikeUp[G]*tiP[GC] + condLikeUp[T]*tiP[TC])*clDP[C];
4116 clFP[G] = (condLikeUp[A]*tiP[AG] + condLikeUp[C]*tiP[CG] + condLikeUp[G]*tiP[GG] + condLikeUp[T]*tiP[TG])*clDP[G];
4117 clFP[T] = (condLikeUp[A]*tiP[AT] + condLikeUp[C]*tiP[CT] + condLikeUp[G]*tiP[GT] + condLikeUp[T]*tiP[TT])*clDP[T];
4120 clFP[A] = (condLikeUp[A]*tiP[AA] + condLikeUp[C]*tiP[AC] + condLikeUp[G]*tiP[AG] + condLikeUp[T]*tiP[AT])*clDP[A];
4121 clFP[C] = (condLikeUp[A]*tiP[CA] + condLikeUp[C]*tiP[CC] + condLikeUp[G]*tiP[CG] + condLikeUp[T]*tiP[CT])*clDP[C];
4122 clFP[G] = (condLikeUp[A]*tiP[GA] + condLikeUp[C]*tiP[GC] + condLikeUp[G]*tiP[GG] + condLikeUp[T]*tiP[GT])*clDP[G];
4123 clFP[T] = (condLikeUp[A]*tiP[TA] + condLikeUp[C]*tiP[TC] + condLikeUp[G]*tiP[TG] + condLikeUp[T]*tiP[TT])*clDP[T];
4137 /*----------------------------------------------------------------
4139 | CondLikeUp_Std: pull likelihoods up and calculate scaled
4140 | finals for an interior node
4142 -----------------------------------------------------------------*/
4143 int CondLikeUp_Std (TreeNode *p, int division, int chain)
4145 int a, c, i, j, k, t, nStates, nCats, coppySize,tmp;
4146 CLFlt *clFA, *clFP, *clDP, *pA, *tiP, condLikeUp[10], sum;
4149 /* find model settings for this division */
4150 m = &modelSettings[division];
4152 /* calculate final states */
4153 if (p->anc->anc == NULL)
4155 /* this is the root node */
4156 /* find conditional likelihood pointers = down cond likes */
4157 /* use conditional likelihood scratch space for final cond likes */
4158 clDP = m->condLikes[m->condLikeIndex[chain][p->index]];
4159 clFP = m->condLikes[m->condLikeScratchIndex[p->index]];
4162 /* final cond likes = downpass cond likes */
4163 for (c=0; c<m->numChars; c++)
4165 /* calculate nStates and nCats */
4166 nStates = m->nStates[c];
4168 /* the following lines ensure that nCats is 1 unless */
4169 /* the character is binary and beta categories are used */
4171 nCats = m->numBetaCats;
4175 coppySize+=nCats*nStates;
4178 /* finally multiply with the gamma cats */
4179 coppySize *= m->numGammaCats;
4181 /* copy cond likes */
4182 for (k=0; k<coppySize; k++)
4183 *(clFP++) = *(clDP++);
4187 /* find conditional likelihood pointers */
4188 /* use conditional likelihood scratch space for final cond likes */
4189 clFA = m->condLikes[m->condLikeScratchIndex[p->anc->index]];
4190 clFP = m->condLikes[m->condLikeScratchIndex[p->index ]];
4191 clDP = m->condLikes[m->condLikeIndex[chain][p->index ]];
4193 /* find transition probabilities */
4194 pA = m->tiProbs[m->tiProbsIndex[chain][p->index]];
4196 for (k=0; k<m->numGammaCats; k++)
4198 for (c=0; c<m->numChars; c++)
4201 /* calculate nStates and nCats */
4202 nStates = m->nStates[c];
4204 /* the following lines ensure that nCats is 1 unless */
4205 /* the character is binary and beta categories are used */
4207 nCats = m->numBetaCats;
4211 tmp = k*nStates*nStates; /* tmp contains offset to skip gamma cats that already processed*/
4212 tiP = pA + m->tiIndex[c] + tmp;
4213 tmp = (m->numGammaCats-1)*2*2; /* tmp contains size of block of tpi matrices across all gamma cats (minus one) for single beta category. Further used only if character is binary to jump to next beta category */
4215 /* finally multiply with the gamma cats */
4216 //nCats *= m->numGammaCats;
4218 /* now calculate the final cond likes */
4219 for (t=0; t<nCats; t++)
4221 for (a=j=0; a<nStates; a++)
4224 for (i=0; i<nStates; i++)
4225 sum += tiP[j++]*clDP[i];
4227 condLikeUp[a] = 0.0; /* we lost the conditional likelihood in the downpass (can occur in gamma model) */
4229 condLikeUp[a] = clFA[a] / sum;
4232 for (a=j=0; a<nStates; a++)
4235 for (i=0; i<nStates; i++)
4237 sum += condLikeUp[i] * tiP[j++];
4239 clFP[a] = sum * clDP[a];
4255 /*----------------------------------------------------------------
4257 | CondLikeScaler_Gen: general n-state model with or without rate
4260 -----------------------------------------------------------------*/
4261 int CondLikeScaler_Gen (TreeNode *p, int division, int chain)
4263 int c, k, n, nStates;
4264 CLFlt scaler, **clP, *clPtr, *scP, *lnScaler;
4266 # if defined (FAST_LOG)
4270 assert (p->scalerNode == YES);
4272 m = &modelSettings[division];
4273 nStates = m->numModelStates;
4275 /* find conditional likelihood pointers */
4276 clPtr = m->condLikes[m->condLikeIndex[chain][p->index]];
4278 for (k=0; k<m->numGammaCats; k++)
4281 clPtr += m->numChars * m->numModelStates;
4284 /* find node scalers */
4285 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
4287 /* find site scalers */
4288 lnScaler = m->scalers[m->siteScalerIndex[chain]];
4291 for (c=0; c<m->numChars; c++)
4294 for (k=0; k<m->numGammaCats; k++)
4296 for (n=0; n<nStates; n++)
4298 if (clP[k][n] > scaler)
4303 # if defined (FAST_LOG)
4304 frexp (scaler, &index);
4306 scaler = scalerValue[index];
4308 for (k=0; k<m->numGammaCats; k++)
4310 for (n=0; n<nStates; n++)
4311 clP[k][n] /= scaler;
4315 # if defined (FAST_LOG)
4316 scP[c] = logValue[index]; /* store node scaler */
4317 lnScaler[c] += scP[c]; /* add into tree scaler */
4319 scP[c] = (CLFlt) log (scaler); /* store node scaler */
4320 lnScaler[c] += scP[c]; /* add into tree scaler */
4324 m->scalersSet[chain][p->index] = YES;
4330 #if defined (SSE_ENABLED)
4331 /*----------------------------------------------------------------
4333 | CondLikeScaler_Gen_SSE: general n-state model with or without rate
4336 -----------------------------------------------------------------*/
4337 int CondLikeScaler_Gen_SSE (TreeNode *p, int division, int chain)
4339 int c, k, n, nStates;
4340 CLFlt *scP, *lnScaler;
4341 __m128 *clPtr, **clP, m1;
4343 # if defined (FAST_LOG)
4347 m = &modelSettings[division];
4348 nStates = m->numModelStates;
4350 /* find conditional likelihood pointers */
4351 clPtr = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->index]];
4353 for (k=0; k<m->numGammaCats; k++)
4356 clPtr += m->numSSEChars * m->numModelStates;
4359 /* find node scalers */
4360 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
4361 //scP_SSE = (__m128 *) scP;
4363 /* find site scalers */
4364 lnScaler = m->scalers[m->siteScalerIndex[chain]];
4367 for (c=0; c<m->numSSEChars; c++)
4370 m1 = _mm_setzero_ps ();
4371 for (k=0; k<m->numGammaCats; k++)
4373 for (n=0; n<nStates; n++)
4375 m1 = _mm_max_ps (m1, clP[k][n]);
4378 _mm_store_ps (scP, m1);
4379 scP += FLOATS_PER_VEC;
4381 # if defined (FAST_LOG)
4382 frexp (scaler, &index);
4384 scaler = scalerValue[index];
4386 for (k=0; k<m->numGammaCats; k++)
4388 for (n=0; n<nStates; n++)
4390 *clP[k] = _mm_div_ps (*clP[k], m1);
4396 /* Reset scP to original position*/
4397 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
4398 for (c=0; c<m->numChars; c++)
4400 # if defined (FAST_LOG)
4401 scP[c] = logValue[index]; /* store node scaler */
4402 lnScaler[c] += scP[c]; /* add into tree scaler */
4404 scP[c] = (CLFlt) log (scP[c]); /* store node scaler */
4405 lnScaler[c] += scP[c]; /* add into tree scaler */
4409 m->scalersSet[chain][p->index] = YES;
4416 /*----------------------------------------------------------------
4418 | CondLikeScaler_Gen_GibbsGamma: general n-state model with Gibbs
4419 | sampling of rate categories in discrete gamma
4421 -----------------------------------------------------------------*/
4422 int CondLikeScaler_Gen_GibbsGamma (TreeNode *p, int division, int chain)
4424 int c, i, j, n, nStates, *rateCat, nGammaCats;
4425 CLFlt scaler, *clP, *scP, *lnScaler;
4427 # if defined (FAST_LOG)
4431 assert (p->scalerNode == YES);
4433 m = &modelSettings[division];
4434 nStates = m->numModelStates;
4436 /* find conditional likelihood pointer */
4437 clP = m->condLikes[m->condLikeIndex[chain][p->index]];
4439 /* flip node scalers */
4440 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
4442 /* find site scalers */
4443 lnScaler = m->scalers[m->siteScalerIndex[chain]];
4445 /* find rate category index and number of gamma categories */
4446 rateCat = m->tiIndex + chain * m->numChars;
4447 nGammaCats = m->numGammaCats;
4451 for (c=0; c<m->numChars; c++)
4453 if (rateCat[c] < nGammaCats)
4456 for (n=0; n<nStates; n++)
4458 if (clP[i] > scaler)
4463 # if defined (FAST_LOG)
4464 frexp (scaler, &index);
4466 scaler = scalerValue[index];
4469 for (n=0; n<nStates; n++)
4472 # if defined (FAST_LOG)
4473 scP[c] = logValue[index]; /* store node scaler */
4474 lnScaler[c] += scP[c]; /* add into tree scaler */
4476 scP[c] = (CLFlt) log (scaler); /* store node scaler */
4477 lnScaler[c] += scP[c]; /* add into tree scaler */
4484 /* no need to add it to the lnScaler */
4490 m->scalersSet[chain][p->index] = YES;
4496 /*----------------------------------------------------------------
4498 | CondLikeScaler_NUC4: 4by4 nucleotide model with or without rate
4501 -----------------------------------------------------------------*/
4502 int CondLikeScaler_NUC4 (TreeNode *p, int division, int chain)
4505 CLFlt scaler, *scP, *lnScaler, *clPtr, **clP;
4508 # if defined (FAST_LOG)
4512 m = &modelSettings[division];
4513 assert (p->scalerNode == YES);
4515 /* find conditional likelihood pointers */
4516 clPtr = m->condLikes[m->condLikeIndex[chain][p->index]];
4518 for (k=0; k<m->numGammaCats; k++)
4521 clPtr += m->numChars * m->numModelStates;
4524 /* find node scalers */
4525 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
4527 /* find site scalers */
4528 lnScaler = m->scalers[m->siteScalerIndex[chain]];
4530 /* rescale values */
4531 for (c=0; c<m->numChars; c++)
4534 for (k=0; k<m->numGammaCats; k++)
4536 if (clP[k][A] > scaler)
4538 if (clP[k][C] > scaler)
4540 if (clP[k][G] > scaler)
4542 if (clP[k][T] > scaler)
4546 # if defined (FAST_LOG)
4547 frexp (scaler, &index);
4549 scaler = scalerValue[index];
4551 for (k=0; k<m->numGammaCats; k++)
4553 clP[k][A] /= scaler;
4554 clP[k][C] /= scaler;
4555 clP[k][G] /= scaler;
4556 clP[k][T] /= scaler;
4560 # if defined (FAST_LOG)
4561 scP[c] = logValue[index]; /* store node scaler */
4562 lnScaler[c] += scP[c]; /* add into tree scaler */
4564 scP[c] = (CLFlt) log(scaler); /* store node scaler */
4565 lnScaler[c] += scP[c]; /* add into tree scaler */
4569 m->scalersSet[chain][p->index] = YES; /* set flag marking scalers set */
4575 #if defined (SSE_ENABLED)
4576 /*----------------------------------------------------------------
4578 | CondLikeScaler_NUC4_SSE: 4by4 nucleotide model with or without rate
4579 | variation using SSE code
4581 -----------------------------------------------------------------*/
4582 int CondLikeScaler_NUC4_SSE (TreeNode *p, int division, int chain)
4585 CLFlt *scP, *lnScaler;
4586 __m128 *clPtr, **clP, *scP_SSE, m1;
4589 m = &modelSettings[division];
4590 assert (p->scalerNode == YES);
4592 /* find conditional likelihood pointers */
4593 clPtr = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->index]];
4595 for (k=0; k<m->numGammaCats; k++)
4598 clPtr += m->numSSEChars * m->numModelStates;
4601 /* find node scalers */
4602 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
4603 scP_SSE = (__m128 *) scP;
4605 /* find site scalers */
4606 lnScaler = m->scalers[m->siteScalerIndex[chain]];
4609 for (c=0; c<m->numSSEChars; c++)
4611 m1 = _mm_setzero_ps ();
4612 for (k=0; k<m->numGammaCats; k++)
4614 m1 = _mm_max_ps (m1, clP[k][A]);
4615 m1 = _mm_max_ps (m1, clP[k][C]);
4616 m1 = _mm_max_ps (m1, clP[k][G]);
4617 m1 = _mm_max_ps (m1, clP[k][T]);
4620 for (k=0; k<m->numGammaCats; k++)
4622 *clP[k] = _mm_div_ps (*clP[k], m1);
4624 *clP[k] = _mm_div_ps (*clP[k], m1);
4626 *clP[k] = _mm_div_ps (*clP[k], m1);
4628 *clP[k] = _mm_div_ps (*clP[k], m1);
4635 /* update site scalers */
4636 for (c=0; c<m->numChars; c++)
4637 lnScaler[c] += (scP[c] = (CLFlt)(log (scP[c]))); /* add log of new scaler into tree scaler */
4639 m->scalersSet[chain][p->index] = YES; /* set flag marking scalers set */
4647 /*----------------------------------------------------------------
4649 | CondLikeScaler_NUC4_GibbsGamma: 4by4 nucleotide model with rate
4650 | variation approximated by Gibbs sampling from gamma
4652 -----------------------------------------------------------------*/
4653 int CondLikeScaler_NUC4_GibbsGamma (TreeNode *p, int division, int chain)
4655 int c, i, j, nGammaCats, *rateCat;
4656 CLFlt scaler, *clP, *scP, *lnScaler;
4659 # if defined (FAST_LOG)
4663 assert (p->scalerNode == YES);
4665 m = &modelSettings[division];
4667 /* find conditional likelihood pointer */
4668 clP = m->condLikes[m->condLikeIndex[chain][p->index]];
4670 /* find node scalers */
4671 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
4673 /* find site scalers */
4674 lnScaler = m->scalers[m->siteScalerIndex[chain]];
4676 /* find rate category index and number of gamma categories */
4677 rateCat = m->tiIndex + chain * m->numChars;
4678 nGammaCats = m->numGammaCats;
4682 for (c=0; c<m->numChars; c++)
4684 if (rateCat[c] < nGammaCats)
4687 if (clP[i] > scaler)
4690 if (clP[i] > scaler)
4693 if (clP[i] > scaler)
4696 if (clP[i] > scaler)
4700 # if defined (FAST_LOG)
4701 frexp (scaler, &index);
4703 scaler = scalerValue[index];
4711 # if defined (FAST_LOG)
4712 scP[c] = logValue[index]; /* store node scaler */
4713 lnScaler[c] += scP[c]; /* add into tree scaler */
4715 scP[c] = (CLFlt) log (scaler); /* store node scaler */
4716 lnScaler[c] += scP[c]; /* add into tree scaler */
4721 scP[c] = 0.0; /* store node scaler */
4722 /* no need to add it to the lnScaler */
4728 m->scalersSet[chain][p->index] = YES;
4734 #if !defined (SSE_ENABLED) || 1
4735 /*----------------------------------------------------------------
4737 | CondLikeScaler_NY98: codon model with omega variation
4739 -----------------------------------------------------------------*/
4740 int CondLikeScaler_NY98 (TreeNode *p, int division, int chain)
4742 int c, k, n, nStates;
4743 CLFlt scaler, **clP, *clPtr, *scP, *lnScaler;
4745 # if defined (FAST_LOG)
4749 m = &modelSettings[division];
4750 nStates = m->numModelStates;
4752 /* find conditional likelihood pointers */
4753 clPtr = m->condLikes[m->condLikeIndex[chain][p->index]];
4755 for (k=0; k<m->numOmegaCats; k++)
4758 clPtr += m->numChars * m->numModelStates;
4761 /* find node scalers */
4762 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
4764 /* find site scalers */
4765 lnScaler = m->scalers[m->siteScalerIndex[chain]];
4768 for (c=0; c<m->numChars; c++)
4771 for (k=0; k<m->numOmegaCats; k++)
4773 for (n=0; n<nStates; n++)
4775 if (clP[k][n] > scaler)
4780 # if defined (FAST_LOG)
4781 frexp (scaler, &index);
4783 scaler = scalerValue[index];
4785 for (k=0; k<m->numOmegaCats; k++)
4787 for (n=0; n<nStates; n++)
4789 clP[k][n] /= scaler;
4794 # if defined (FAST_LOG)
4795 scP[c] = logValue[index]; /* store node scaler */
4796 lnScaler[c] += scP[c]; /* add into tree scaler */
4798 scP[c] = (CLFlt) log (scaler); /* store node scaler */
4799 lnScaler[c] += scP[c]; /* add into tree scaler */
4803 m->scalersSet[chain][p->index] = YES;
4810 #if defined (SSE_ENABLED)
4811 /*----------------------------------------------------------------
4813 | CondLikeScaler_NY98_SSE: codon model with omega variation
4815 -----------------------------------------------------------------*/
4816 int CondLikeScaler_NY98_SSE (TreeNode *p, int division, int chain)
4818 int c, k, n, nStates;
4819 CLFlt *scP, *lnScaler;
4820 __m128 *clPtr, **clP, m1;
4822 # if defined (FAST_LOG)
4826 m = &modelSettings[division];
4827 nStates = m->numModelStates;
4829 /* find conditional likelihood pointers */
4830 clPtr = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->index]];
4832 for (k=0; k<m->numOmegaCats; k++)
4835 clPtr += m->numSSEChars * m->numModelStates;
4838 /* find node scalers */
4839 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
4840 //scP_SSE = (__m128 *) scP;
4842 /* find site scalers */
4843 lnScaler = m->scalers[m->siteScalerIndex[chain]];
4846 for (c=0; c<m->numSSEChars; c++)
4849 m1 = _mm_setzero_ps ();
4850 for (k=0; k<m->numOmegaCats; k++)
4852 for (n=0; n<nStates; n++)
4854 m1 = _mm_max_ps (m1, clP[k][n]);
4857 _mm_store_ps (scP, m1);
4858 scP += FLOATS_PER_VEC;
4860 # if defined (FAST_LOG)
4861 frexp (scaler, &index);
4863 scaler = scalerValue[index];
4865 for (k=0; k<m->numOmegaCats; k++)
4867 for (n=0; n<nStates; n++)
4869 *clP[k] = _mm_div_ps (*clP[k], m1);
4875 /* Reset scP to original position*/
4876 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
4877 for (c=0; c<m->numChars; c++)
4879 # if defined (FAST_LOG)
4880 scP[c] = logValue[index]; /* store node scaler */
4881 lnScaler[c] += scP[c]; /* add into tree scaler */
4883 scP[c] = (CLFlt) log (scP[c]); /* store node scaler */
4884 lnScaler[c] += scP[c]; /* add into tree scaler */
4888 m->scalersSet[chain][p->index] = YES;
4895 /*----------------------------------------------------------------
4897 | CondLikeScaler_Std: variable states model with or without
4900 -----------------------------------------------------------------*/
4901 int CondLikeScaler_Std (TreeNode *p, int division, int chain)
4903 int c, n, k, nStates, numReps;
4904 CLFlt scaler, *clPtr, **clP, *scP, *lnScaler;
4906 # if defined (FAST_LOG)
4910 assert (p->scalerNode == YES);
4912 m = &modelSettings[division];
4915 for (c=0; c<m->numChars; c++)
4917 if (m->nStates[c] == 2)
4918 numReps += m->numBetaCats * 2;
4920 numReps += m->nStates[c];
4923 /* find conditional likelihood pointers */
4924 clPtr = m->condLikes[m->condLikeIndex[chain][p->index]];
4926 for (k=0; k<m->numGammaCats; k++)
4932 /* find node scalers */
4933 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
4935 /* find site scalers */
4936 lnScaler = m->scalers[m->siteScalerIndex[chain]];
4939 for (c=0; c<m->numChars; c++)
4942 nStates = m->nStates[c];
4944 nStates = m->numBetaCats * 2;
4946 for (k=0; k<m->numGammaCats; k++)
4948 for (n=0; n<nStates; n++)
4950 if (clP[k][n] > scaler)
4955 # if defined (FAST_LOG)
4956 frexp (scaler, &index);
4958 scaler = scalerValue[index];
4960 for (k=0; k<m->numGammaCats; k++)
4962 for (n=0; n<nStates; n++)
4963 clP[k][n] /= scaler;
4967 # if defined (FAST_LOG)
4968 scP[c] = logValue[index]; /* store node scaler */
4969 lnScaler[c] += scP[c]; /* add into tree scaler */
4971 scP[c] = (CLFlt) log (scaler); /* store node scaler */
4972 lnScaler[c] += scP[c]; /* add into tree scaler */
4976 m->scalersSet[chain][p->index] = YES;
4982 /*------------------------------------------------------------------
4984 | Likelihood_Adgamma: all n-state models with autocorrelated
4985 | discrete gamma rate variation, NOT morph, restriction,
4986 | codon or doublet models; just fill in rateProbs
4988 -------------------------------------------------------------------*/
4989 int Likelihood_Adgamma (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
4991 int c, j, k, i, nStates, nStatesDiv2;
4992 MrBFlt *bs, *swr, s01, s10, probOn, probOff, covBF[40];
4997 /* NOTE: whichSitePats offsets numSitesOfPat by whichSitePats X numCompressedChars.
4998 This is done so we can use the character reweighting scheme for "heating" chains. This was easy to
4999 accomplish for all of the models except this one, which doesn't use numSitesOfPat when calculating
5000 likelihoods. Either we disallow autocorrelated rates when using MCMC with character reweighting, or
5001 we properly calculate likelihoods when some site patterns have increased or decreased weight. For
5002 now, we do not allow MCMCMC with character reweighting with this HMM; we bail out in the function
5003 FillNumSitesOfPat if we have Adgamma rate variation and reweighting. */
5006 /* find model settings */
5007 m = &modelSettings[division];
5009 /* get the number of states */
5010 nStates = m->numModelStates;
5011 nStatesDiv2 = nStates / 2;
5013 /* find base frequencies */
5014 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
5016 /* find conditional likelihood pointer */
5017 clP = m->condLikes[m->condLikeIndex[chain][p->index]];
5019 /* find pointer to rate probabilities */
5020 rP = rateProbs[chain] + state[chain] * rateProbRowSize + m->rateProbStart;
5022 /* loop over characters and calculate rate probs */
5023 if (m->switchRates != NULL)
5025 swr = GetParamVals (m->switchRates, chain, state[chain]);
5028 probOn = s01 / (s01 + s10);
5029 probOff = 1.0 - probOn;
5030 for (j=0; j<nStatesDiv2; j++)
5032 covBF[j] = bs[j] * probOn;
5033 covBF[j+nStatesDiv2] = bs[j] * probOff;
5038 for (c=i=0; c<m->numChars; c++)
5040 for (k=0; k<m->numGammaCats; k++)
5043 for (j=0; j<nStates; j++)
5044 like += (*(clP++)) * bs[j];
5049 /* reset lnL, likelihood calculated later for this model */
5056 /*------------------------------------------------------------------
5058 | Likelihood_Gen: general n-state models with or without rate
5061 -------------------------------------------------------------------*/
5062 int Likelihood_Gen (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
5064 int c, j, k, nStates, hasPInvar;
5065 MrBFlt s01, s10, probOn, probOff, *swr;
5066 MrBFlt covBF[40], freq, *bs, like, likeI, pInvar=0.0, lnLike;
5067 CLFlt *clPtr, **clP, *lnScaler, *nSitesOfPat, *clInvar=NULL;
5070 /* find model settings and nStates, pInvar, invar cond likes */
5071 m = &modelSettings[division];
5072 nStates = m->numModelStates;
5073 if (m->pInvar == NULL)
5080 pInvar = *(GetParamVals (m->pInvar, chain, state[chain]));
5081 clInvar = m->invCondLikes;
5084 /* find conditional likelihood pointers */
5085 clPtr = m->condLikes[m->condLikeIndex[chain][p->index]];
5087 for (k=0; k<m->numGammaCats; k++)
5090 clPtr += m->numChars * m->numModelStates;
5094 /* find base frequencies */
5095 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
5097 /* if covarion model, adjust base frequencies */
5098 if (m->switchRates != NULL)
5100 /* find the stationary frequencies */
5101 swr = GetParamVals(m->switchRates, chain, state[chain]);
5104 probOn = s01 / (s01 + s10);
5105 probOff = 1.0 - probOn;
5107 /* now adjust the base frequencies; on-state stored first in cond likes */
5108 for (j=0; j<nStates/2; j++)
5110 covBF[j] = bs[j] * probOn;
5111 covBF[j+nStates/2] = bs[j] * probOff;
5114 /* finally set bs pointer to adjusted values */
5118 /* find category frequencies */
5119 if (hasPInvar == NO)
5120 freq = 1.0 / m->numGammaCats;
5122 freq = (1.0 - pInvar) / m->numGammaCats;
5124 /* find site scaler */
5125 lnScaler = m->scalers[m->siteScalerIndex[chain]];
5127 /* find nSitesOfPat */
5128 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
5133 /* loop over characters */
5134 if (hasPInvar == NO)
5136 for (c=0; c<m->numChars; c++)
5139 for (k=0; k<m->numGammaCats; k++)
5140 for (j=0; j<nStates; j++)
5142 like += (*(clP[k]++)) * bs[j];
5143 # ifdef DEBUG_LIKELIHOOD
5144 // printf ("char=%d cat=%d j=%d like %E\n",c, k,j,like);
5149 /* check against LIKE_EPSILON (values close to zero are problematic) */
5150 if (like < LIKE_EPSILON)
5152 # ifdef DEBUG_LIKELIHOOD
5153 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
5155 (*lnL) = MRBFLT_NEG_MAX;
5161 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
5167 /* has invariable category */
5168 for (c=0; c<m->numChars; c++)
5171 for (k=0; k<m->numGammaCats; k++)
5172 for (j=0; j<nStates; j++)
5174 like += (*(clP[k]++)) * bs[j];
5177 for (j=0; j<nStates; j++)
5178 likeI += (*(clInvar++)) * bs[j] * pInvar;
5179 if (lnScaler[c] < -200.0)
5181 /* we are not going to be able to exponentiate the scaling factor */
5184 /* forget about like; it is going to be insignificant compared to likeI */
5185 lnLike = log(likeI);
5189 /* treat likeI as if 0.0, that is, ignore it completely */
5190 lnLike = log(like) + lnScaler[c];
5194 lnLike = log (like + (likeI / exp (lnScaler[c]))) + lnScaler[c];
5196 /* check against LIKE_EPSILON (values close to zero are problematic) */
5197 if (like < LIKE_EPSILON)
5199 # ifdef DEBUG_LIKELIHOOD
5200 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
5202 (*lnL) = MRBFLT_NEG_MAX;
5208 (*lnL) += lnLike * nSitesOfPat[c];
5217 #if defined (SSE_ENABLED)
5219 //CLFlt DeleteME[1000];
5220 //int PrintOld_SSE (TreeNode *p, int division, int chain){
5222 // int c, c1, j, k, nStates;
5223 // //MrBFlt *swr, likeI, pInvar=0.0, lnLike;
5224 // CLFlt *temp_vector;
5225 // __m128 *clPtr, **clP;
5228 // m = &modelSettings[division];
5229 // nStates = m->numModelStates;
5230 // /* find conditional likelihood pointers */
5232 // temp_vector = DeleteME;
5234 // clPtr = (__m128 *) (m->condLikes[m->condLikeIndex[chain][p->index]]);
5235 // clP = m->clP_SSE;
5236 // for (k=0; k<m->numGammaCats; k++)
5239 // clPtr += m->numSSEChars * m->numModelStates;
5242 // for (c=0; c<m->numChars; c++)
5244 // c1 = c / FLOATS_PER_VEC;
5245 // for (k=0; k<m->numGammaCats; k++)
5247 // for (j=0; j<nStates; j++)
5249 // *temp_vector++ = *(((CLFlt*)&clP[k][c1*nStates+j])+c % FLOATS_PER_VEC);
5253 // temp_vector=DeleteME;
5260 /*------------------------------------------------------------------
5262 | Likelihood_Gen_SSE: general n-state model with or without rate
5265 -------------------------------------------------------------------*/
5266 int Likelihood_Gen_SSE (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
5268 int c, j, k, nStates, hasPInvar;
5270 MrBFlt s01, s10, probOn, probOff, *swr, covBF[40], freq, likeI, pInvar=0.0, lnLike;
5271 CLFlt *lnScaler, *nSitesOfPat, *lnL_SSE, *lnLI_SSE;
5272 __m128 *clPtr, **clP, *clInvar=NULL;
5273 __m128 m1, mCatLike, mLike, mFreq;
5276 /* find model settings and nStates, pInvar, invar cond likes */
5277 m = &modelSettings[division];
5278 nStates = m->numModelStates;
5279 if (m->pInvar == NULL)
5286 pInvar = *(GetParamVals (m->pInvar, chain, state[chain]));
5287 clInvar = (__m128 *) (m->invCondLikes);
5290 /* find conditional likelihood pointers */
5291 clPtr = (__m128 *) (m->condLikes[m->condLikeIndex[chain][p->index]]);
5293 for (k=0; k<m->numGammaCats; k++)
5296 clPtr += m->numSSEChars * m->numModelStates;
5298 lnL_SSE = m->lnL_SSE;
5299 lnLI_SSE = m->lnLI_SSE;
5301 /* find base frequencies */
5302 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
5304 /* if covarion model, adjust base frequencies */
5305 if (m->switchRates != NULL)
5307 /* find the stationary frequencies */
5308 swr = GetParamVals(m->switchRates, chain, state[chain]);
5311 probOn = s01 / (s01 + s10);
5312 probOff = 1.0 - probOn;
5314 /* now adjust the base frequencies; on-state stored first in cond likes */
5315 for (j=0; j<nStates/2; j++)
5317 covBF[j] = bs[j] * probOn;
5318 covBF[j+nStates/2] = bs[j] * probOff;
5321 /* finally set bs pointer to adjusted values */
5325 /* find category frequencies */
5326 if (hasPInvar == NO)
5327 freq = 1.0 / m->numGammaCats;
5329 freq = (1.0 - pInvar) / m->numGammaCats;
5331 mFreq = _mm_set1_ps ((CLFlt)(freq));
5333 /* find site scaler */
5334 lnScaler = m->scalers[m->siteScalerIndex[chain]];
5336 /* find nSitesOfPat */
5337 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
5342 for (c=0; c<m->numSSEChars; c++)
5344 mLike = _mm_setzero_ps ();
5345 for (k=0; k<m->numGammaCats; k++)
5347 mCatLike = _mm_setzero_ps ();
5348 for (j=0; j<nStates; j++)
5350 m1 = _mm_mul_ps (clP[k][j], _mm_set1_ps ((CLFlt)bs[j]));
5351 mCatLike = _mm_add_ps (mCatLike, m1);
5353 m1 = _mm_mul_ps (mCatLike, mFreq);
5354 mLike = _mm_add_ps (mLike, m1);
5357 _mm_store_ps (lnL_SSE, mLike);
5358 lnL_SSE += FLOATS_PER_VEC;
5361 /* loop over characters */
5362 if (hasPInvar == NO)
5364 for (c=0; c<m->numChars; c++)
5366 like = m->lnL_SSE[c];
5367 /* check against LIKE_EPSILON (values close to zero are problematic) */
5368 if (like < LIKE_EPSILON)
5370 # ifdef DEBUG_LIKELIHOOD
5371 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
5373 (*lnL) = MRBFLT_NEG_MAX;
5379 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
5385 /* has invariable category */
5386 for (c=0; c<m->numSSEChars; c++)
5388 mCatLike = _mm_setzero_ps ();
5389 for (j=0; j<nStates; j++)
5391 m1 = _mm_mul_ps (clInvar[j], _mm_set1_ps ((CLFlt)bs[j]));
5392 mCatLike = _mm_add_ps (mCatLike, m1);
5395 _mm_store_ps (lnL_SSE, mCatLike);
5396 lnLI_SSE += FLOATS_PER_VEC;
5399 for (c=0; c<m->numChars; c++)
5401 like = m->lnL_SSE[c];
5402 likeI = m->lnLI_SSE[c];
5403 if (lnScaler[c] < -200.0)
5405 /* we are not going to be able to exponentiate the scaling factor */
5408 /* forget about like; it is going to be insignificant compared to likeI */
5409 lnLike = log(likeI);
5413 /* treat likeI as if 0.0, that is, ignore it completely */
5414 lnLike = log(like) + lnScaler[c];
5418 lnLike = log (like + (likeI / exp (lnScaler[c]))) + lnScaler[c];
5420 /* check against LIKE_EPSILON (values close to zero are problematic) */
5421 if (like < LIKE_EPSILON)
5423 # ifdef DEBUG_LIKELIHOOD
5424 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
5426 (*lnL) = MRBFLT_NEG_MAX;
5432 (*lnL) += lnLike * nSitesOfPat[c];
5443 /*------------------------------------------------------------------
5445 | Likelihood_Gen_GibbsGamma: general n-state models using
5446 | Gibbs resampling of discrete gamma rate categories
5448 -------------------------------------------------------------------*/
5449 int Likelihood_Gen_GibbsGamma (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
5451 int c, j, nStates, nGammaCats, *rateCat;
5452 MrBFlt s01, s10, probOn, probOff, *swr;
5453 MrBFlt covBF[40], *bs, like;
5454 CLFlt *clP, *lnScaler, *nSitesOfPat, *clInvar=NULL;
5457 /* find model settings, nStates and invar cond likes */
5458 m = &modelSettings[division];
5459 nStates = m->numModelStates;
5460 clInvar = m->invCondLikes;
5462 /* find conditional likelihood pointer */
5463 clP = m->condLikes[m->condLikeIndex[chain][p->index]];
5465 /* find base frequencies */
5466 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
5468 /* if covarion model, adjust base frequencies */
5469 if (m->switchRates != NULL)
5471 /* find the stationary frequencies */
5472 swr = GetParamVals(m->switchRates, chain, state[chain]);
5475 probOn = s01 / (s01 + s10);
5476 probOff = 1.0 - probOn;
5478 /* now adjust the base frequencies; on-state stored first in cond likes */
5479 for (j=0; j<nStates/2; j++)
5481 covBF[j] = bs[j] * probOn;
5482 covBF[j+nStates/2] = bs[j] * probOff;
5485 /* finally set bs pointer to adjusted values */
5489 /* find site scaler */
5490 lnScaler = m->scalers[m->siteScalerIndex[chain]];
5492 /* find nSitesOfPat */
5493 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
5495 /* find rate category index and number of gamma categories */
5496 rateCat = m->tiIndex + chain * m->numChars;
5497 nGammaCats = m->numGammaCats;
5502 /* loop over characters */
5503 if (m->pInvar == NULL)
5505 for (c=0; c<m->numChars; c++)
5508 for (j=0; j<nStates; j++)
5510 like += (*(clP++)) * bs[j];
5511 # ifdef DEBUG_LIKELIHOOD
5512 // printf ("char=%d cat=%d j=%d like %E\n",c, k,j,like);
5516 /* check against LIKE_EPSILON (values close to zero are problematic) */
5517 if (like < LIKE_EPSILON)
5519 # ifdef DEBUG_LIKELIHOOD
5520 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
5522 (*lnL) = MRBFLT_NEG_MAX;
5528 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
5534 /* has invariable category */
5535 for (c=0; c<m->numChars; c++)
5538 if (rateCat[c] < nGammaCats)
5540 for (j=0; j<nStates; j++)
5541 like += (*(clP++)) * bs[j];
5546 for (j=0; j<nStates; j++)
5547 like += (*(clInvar++)) * bs[j];
5551 /* check against LIKE_EPSILON (values close to zero are problematic) */
5552 if (like < LIKE_EPSILON)
5554 # ifdef DEBUG_LIKELIHOOD
5555 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
5557 (*lnL) = MRBFLT_NEG_MAX;
5563 (*lnL) += (log(like) + lnScaler[c]) * nSitesOfPat[c];
5572 /*------------------------------------------------------------------
5574 | Likelihood_NUC4: 4by4 nucleotide models with or without rate
5577 -------------------------------------------------------------------*/
5578 int Likelihood_NUC4 (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
5580 int c, k, hasPInvar;
5581 MrBFlt freq, likeI, *bs, like, pInvar=0.0;
5582 CLFlt *clPtr, **clP, *lnScaler, *nSitesOfPat, *clInvar=NULL;
5585 # if defined (FAST_LOG)
5587 MrBFlt likeAdjust = 1.0, f;
5590 /* find model settings and pInvar, invar cond likes */
5591 m = &modelSettings[division];
5592 if (m->pInvar == NULL)
5599 pInvar = *(GetParamVals (m->pInvar, chain, state[chain]));
5600 clInvar = m->invCondLikes;
5603 /* find conditional likelihood pointers */
5604 clPtr = m->condLikes[m->condLikeIndex[chain][p->index]];
5606 for (k=0; k<m->numGammaCats; k++)
5609 clPtr += m->numChars * m->numModelStates;
5612 /* find base frequencies */
5613 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
5615 /* find category frequencies */
5616 if (hasPInvar == NO)
5617 freq = 1.0 / m->numGammaCats;
5619 freq = (1.0 - pInvar) / m->numGammaCats;
5621 /* find tree scaler */
5622 lnScaler = m->scalers[m->siteScalerIndex[chain]];
5624 /* find nSitesOfPat */
5625 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
5630 /* loop over characters */
5631 if (hasPInvar == NO)
5633 for (c=0; c<m->numChars; c++)
5636 for (k=0; k<m->numGammaCats; k++)
5638 like += (clP[k][A] * bs[A] + clP[k][C] * bs[C] + clP[k][G] * bs[G] + clP[k][T] * bs[T]);
5643 /* check against LIKE_EPSILON (values close to zero are problematic) */
5644 if (like < LIKE_EPSILON)
5646 # ifdef DEBUG_LIKELIHOOD
5647 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
5649 (*lnL) = MRBFLT_NEG_MAX;
5655 # if defined (FAST_LOG)
5656 f = frexp (like, &index);
5658 (*lnL) += (lnScaler[c] + logValue[index]) * nSitesOfPat[c];
5659 for (k=0; k<(int)nSitesOfPat[c]; k++)
5662 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
5669 /* has invariable category */
5670 for (c=0; c<m->numChars; c++)
5673 for (k=0; k<m->numGammaCats; k++)
5675 like += (clP[k][A] * bs[A] + clP[k][C] * bs[C] + clP[k][G] * bs[G] + clP[k][T] * bs[T]);
5679 likeI = (clInvar[A] * bs[A] + clInvar[C] * bs[C] + clInvar[G] * bs[G] + clInvar[T] * bs[T]) * pInvar;
5680 if (lnScaler[c] < -200)
5682 /* we are not going to be able to exponentiate the scaling factor */
5685 /* forget about like; it is going to be insignificant compared to likeI */
5690 /* treat likeI as if 0.0, that is, ignore it completely */
5694 like = like + (likeI / exp (lnScaler[c]));
5698 /* check against LIKE_EPSILON (values close to zero are problematic) */
5699 if (like < LIKE_EPSILON)
5701 # ifdef DEBUG_LIKELIHOOD
5702 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
5704 (*lnL) = MRBFLT_NEG_MAX;
5710 # if defined (FAST_LOG)
5711 f = frexp (like, &index);
5713 (*lnL) += (lnScaler[c] + logValue[index]) * nSitesOfPat[c];
5714 for (k=0; k<(int)nSitesOfPat[c]; k++)
5717 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
5723 # if defined (FAST_LOG)
5724 (*lnL) += log (likeAdjust);
5731 /*------------------------------------------------------------------
5733 | Likelihood_NUC4_GibbsGamma: 4by4 nucleotide models with rate
5734 | variation using Gibbs sampling from gamma rate categories
5736 -------------------------------------------------------------------*/
5737 int Likelihood_NUC4_GibbsGamma (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
5739 int c, i, r, nGammaCats, *rateCat;
5741 CLFlt *clP, *lnScaler, *nSitesOfPat, *clInvar;
5744 # if defined (FAST_LOG)
5746 MrBFlt likeAdjust = 1.0, f;
5749 /* find model settings and invar cond likes */
5750 m = &modelSettings[division];
5751 clInvar = m->invCondLikes;
5753 /* find conditional likelihood pointer */
5754 clP = m->condLikes[m->condLikeIndex[chain][p->index]];
5756 /* find base frequencies */
5757 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
5759 /* find tree scaler */
5760 lnScaler = m->scalers[m->siteScalerIndex[chain]];
5762 /* find nSitesOfPat */
5763 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
5765 /* find rate category index and number of gamma categories */
5766 rateCat = m->tiIndex + chain * m->numChars;
5767 nGammaCats = m->numGammaCats;
5772 /* loop over characters */
5773 if (m->pInvar == NULL)
5775 for (c=i=0; c<m->numChars; c++)
5777 like = (clP[A] * bs[A] + clP[C] * bs[C] + clP[G] * bs[G] + clP[T] * bs[T]);
5780 /* check against LIKE_EPSILON (values close to zero are problematic) */
5781 if (like < LIKE_EPSILON)
5783 # ifdef DEBUG_LIKELIHOOD
5784 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
5786 (*lnL) = MRBFLT_NEG_MAX;
5792 # if defined (FAST_LOG)
5793 f = frexp (like, &index);
5795 (*lnL) += (lnScaler[c] + logValue[index]) * nSitesOfPat[c];
5796 for (k=0; k<(int)nSitesOfPat[c]; k++)
5799 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
5806 /* has invariable category */
5807 for (c=i=0; c<m->numChars; c++)
5811 like = (clP[A] * bs[A] + clP[C] * bs[C] + clP[G] * bs[G] + clP[T] * bs[T]);
5813 like = (clInvar[A] * bs[A] + clInvar[C] * bs[C] + clInvar[G] * bs[G] + clInvar[T] * bs[T]);
5817 /* check against LIKE_EPSILON (values close to zero are problematic) */
5818 if (like < LIKE_EPSILON)
5820 # ifdef DEBUG_LIKELIHOOD
5821 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
5823 (*lnL) = MRBFLT_NEG_MAX;
5829 (*lnL) += (log (like) + lnScaler[c]) * nSitesOfPat[c];
5834 # if defined (FAST_LOG)
5835 (*lnL) += log (likeAdjust);
5842 //#if defined (SSE_ENABLED)
5843 ///*------------------------------------------------------------------
5845 // | Likelihood_NUC4_GibbsGamma: 4by4 nucleotide models with rate
5846 // | variation using Gibbs sampling from gamma rate categories
5848 // -------------------------------------------------------------------*/
5849 //int Likelihood_NUC4_GibbsGamma_SSE (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
5851 // int c, i, r, nGammaCats, *rateCat;
5852 // MrBFlt *bs, like;
5853 // CLFlt *lnScaler, *nSitesOfPat, *lnL_SSE, *lnLI_SSE;
5854 // __m128 *clP, *clInvar=NULL;
5855 // __m128 m1, mA, mC, mG, mT, mFreq, mPInvar, mLike;
5858 //#if defined (FAST_LOG)
5860 // MrBFlt likeAdjust = 1.0, f;
5863 // /* find model settings and invar cond likes */
5864 // m = &modelSettings[division];
5865 // clInvar = (__m128 *)m->invCondLikes;
5866 // /* find conditional likelihood pointer */
5867 // clP = (__m128 *)m->condLikes[m->condLikeIndex[chain][p->index]];
5869 // lnL_SSE = m->lnL_SSE;
5870 // lnLI_SSE = m->lnLI_SSE;
5872 // /* find base frequencies */
5873 // bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
5875 // /* find tree scaler */
5876 // lnScaler = m->scalers[m->siteScalerIndex[chain]];
5878 // /* find nSitesOfPat */
5879 // nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
5881 // /* find rate category index and number of gamma categories */
5882 // rateCat = m->tiIndex + chain * m->numChars;
5883 // nGammaCats = m->numGammaCats;
5888 // /* calculate variable likelihood */
5889 // for (c=0; c<m->numSSEChars; c++)
5891 // mLike = _mm_mul_ps (clP[A], mA);
5892 // m1 = _mm_mul_ps (clP[C], mC);
5893 // mLike = _mm_add_ps (mLike, m1);
5894 // m1 = _mm_mul_ps (clP[G], mG);
5895 // mLike = _mm_add_ps (mLike, m1);
5896 // m1 = _mm_mul_ps (clP[T], mT);
5897 // mLike = _mm_add_ps (mLike, m1);
5900 // _mm_store_ps (lnL_SSE, mLike);
5901 // lnL_SSE += FLOATS_PER_VEC;
5904 // /* calculate invariable likelihood */
5905 // if (hasPInvar == YES)
5907 // for (c=0; c<m->numSSEChars; c++)
5909 // mLike = _mm_mul_ps (clInvar[A], mA);
5910 // m1 = _mm_mul_ps (clInvar[C], mC);
5911 // mLike = _mm_add_ps (mLike, m1);
5912 // m1 = _mm_mul_ps (clInvar[G], mG);
5913 // mLike = _mm_add_ps (mLike, m1);
5914 // m1 = _mm_mul_ps (clInvar[T], mT);
5915 // mLike = _mm_add_ps (mLike, m1);
5916 // mLike = _mm_mul_ps (mLike, mPInvar);
5918 // _mm_store_ps (lnLI_SSE, mLike);
5920 // lnLI_SSE += FLOATS_PER_VEC;
5925 // /* loop over characters */
5926 // if (m->pInvar == NULL)
5928 // for (c=i=0; c<m->numChars; c++)
5930 // like = m->lnL_SSE[c];
5931 // /* check against LIKE_EPSILON (values close to zero are problematic) */
5932 // if (like < LIKE_EPSILON)
5934 // MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30lf\n", spacer, division, c, like);
5935 // (*lnL) = MRBFLT_NEG_MAX;
5940 //#if defined (FAST_LOG)
5941 // f = frexp (like, &index);
5943 // (*lnL) += (lnScaler[c] + logValue[index]) * nSitesOfPat[c];
5944 // for (k=0; k<(int)nSitesOfPat[c]; k++)
5947 // (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
5954 // /* has invariable category */
5955 // for (c=i=0; c<m->numChars; c++)
5958 // if (r < nGammaCats)
5959 // like = m->lnL_SSE[c];
5961 // like = m->lnLI_SSE[c];
5963 // /* check against LIKE_EPSILON (values close to zero are problematic) */
5964 // if (like < LIKE_EPSILON)
5966 // MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30lf\n", spacer, division, c, like);
5967 // (*lnL) = MRBFLT_NEG_MAX;
5972 // (*lnL) += (log (like) + lnScaler[c]) * nSitesOfPat[c];
5977 //#if defined (FAST_LOG)
5978 // (*lnL) += log (likeAdjust);
5986 #if defined (SSE_ENABLED)
5987 /*------------------------------------------------------------------
5989 | Likelihood_NUC4_SSE: 4by4 nucleotide models with or without rate
5992 -------------------------------------------------------------------*/
5993 int Likelihood_NUC4_SSE (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
5995 int c, k, hasPInvar;
5996 MrBFlt freq, *bs, pInvar=0.0, like, likeI;
5997 CLFlt *lnScaler, *nSitesOfPat, *lnL_SSE, *lnLI_SSE;
5998 __m128 *clPtr, **clP, *clInvar=NULL;
5999 __m128 m1, mA, mC, mG, mT, mFreq, mPInvar=_mm_set1_ps(0.0f), mLike;
6002 # if defined (FAST_LOG)
6004 MrBFlt likeAdjust = 1.0, f;
6007 /* find model settings and pInvar, invar cond likes */
6008 m = &modelSettings[division];
6009 if (m->pInvar == NULL)
6016 pInvar = *(GetParamVals (m->pInvar, chain, state[chain]));
6017 mPInvar = _mm_set1_ps ((CLFlt)(pInvar));
6018 clInvar = (__m128 *) (m->invCondLikes);
6021 /* find conditional likelihood pointers */
6022 clPtr = (__m128 *) (m->condLikes[m->condLikeIndex[chain][p->index]]);
6024 for (k=0; k<m->numGammaCats; k++)
6027 clPtr += m->numSSEChars * m->numModelStates;
6029 lnL_SSE = m->lnL_SSE;
6030 lnLI_SSE = m->lnLI_SSE;
6032 /* find base frequencies */
6033 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
6034 mA = _mm_set1_ps ((CLFlt)(bs[A]));
6035 mC = _mm_set1_ps ((CLFlt)(bs[C]));
6036 mG = _mm_set1_ps ((CLFlt)(bs[G]));
6037 mT = _mm_set1_ps ((CLFlt)(bs[T]));
6039 /* find category frequencies */
6040 if (hasPInvar == NO)
6041 freq = 1.0 / m->numGammaCats;
6043 freq = (1.0 - pInvar) / m->numGammaCats;
6044 mFreq = _mm_set1_ps ((CLFlt)(freq));
6046 /* find tree scaler */
6047 lnScaler = m->scalers[m->siteScalerIndex[chain]];
6049 /* find nSitesOfPat */
6050 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
6055 /* calculate variable likelihood */
6056 for (c=0; c<m->numSSEChars; c++)
6058 mLike = _mm_setzero_ps ();
6059 for (k=0; k<m->numGammaCats; k++)
6061 m1 = _mm_mul_ps (clP[k][A], mA);
6062 mLike = _mm_add_ps (mLike, m1);
6063 m1 = _mm_mul_ps (clP[k][C], mC);
6064 mLike = _mm_add_ps (mLike, m1);
6065 m1 = _mm_mul_ps (clP[k][G], mG);
6066 mLike = _mm_add_ps (mLike, m1);
6067 m1 = _mm_mul_ps (clP[k][T], mT);
6068 mLike = _mm_add_ps (mLike, m1);
6071 mLike = _mm_mul_ps (mLike, mFreq);
6072 _mm_store_ps (lnL_SSE, mLike);
6073 lnL_SSE += FLOATS_PER_VEC;
6076 /* calculate invariable likelihood */
6077 if (hasPInvar == YES)
6079 for (c=0; c<m->numSSEChars; c++)
6081 mLike = _mm_mul_ps (clInvar[A], mA);
6082 m1 = _mm_mul_ps (clInvar[C], mC);
6083 mLike = _mm_add_ps (mLike, m1);
6084 m1 = _mm_mul_ps (clInvar[G], mG);
6085 mLike = _mm_add_ps (mLike, m1);
6086 m1 = _mm_mul_ps (clInvar[T], mT);
6087 mLike = _mm_add_ps (mLike, m1);
6088 mLike = _mm_mul_ps (mLike, mPInvar);
6090 _mm_store_ps (lnLI_SSE, mLike);
6092 lnLI_SSE += FLOATS_PER_VEC;
6096 /* accumulate results */
6097 if (hasPInvar == NO)
6099 for (c=0; c<m->numChars; c++)
6101 like = m->lnL_SSE[c];
6102 /* check against LIKE_EPSILON (values close to zero are problematic) */
6103 if (like < LIKE_EPSILON)
6105 # ifdef DEBUG_LIKELIHOOD
6106 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
6108 (*lnL) = MRBFLT_NEG_MAX;
6114 # if defined (FAST_LOG)
6115 f = frexp (like, &index);
6117 (*lnL) += (lnScaler[c] + logValue[index]) * nSitesOfPat[c];
6118 for (k=0; k<(int)nSitesOfPat[c]; k++)
6121 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
6128 /* has invariable category */
6129 for (c=0; c<m->numChars; c++)
6131 like = m->lnL_SSE[c];
6132 likeI = m->lnLI_SSE[c];
6133 if (lnScaler[c] < -200)
6135 /* we are not going to be able to exponentiate the scaling factor */
6138 /* forget about like; it is going to be insignificant compared to likeI */
6143 /* treat likeI as if 0.0, that is, ignore it completely */
6147 like = like + (likeI / exp (lnScaler[c]));
6149 /* check against LIKE_EPSILON (values close to zero are problematic) */
6150 if (like < LIKE_EPSILON)
6152 # ifdef DEBUG_LIKELIHOOD
6153 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
6155 (*lnL) = MRBFLT_NEG_MAX;
6161 # if defined (FAST_LOG)
6162 f = frexp (like, &index);
6164 (*lnL) += (lnScaler[c] + logValue[index]) * nSitesOfPat[c];
6165 for (k=0; k<(int)nSitesOfPat[c]; k++)
6168 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
6174 # if defined (FAST_LOG)
6175 (*lnL) += log (likeAdjust);
6183 /*------------------------------------------------------------------
6185 | Likelihood_NY98: Codon model with three selection categories,
6186 | after Nielsen and Yang (1998).
6188 -------------------------------------------------------------------*/
6189 int Likelihood_NY98 (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
6191 int c, j, k, nStates;
6192 MrBFlt catLike, like, *bs, *omegaCatFreq;
6193 CLFlt **clP,*clPtr, *lnScaler, *nSitesOfPat;
6196 m = &modelSettings[division];
6198 /* number of states */
6199 nStates = m->numModelStates;
6201 /* find conditional likelihood pointers */
6202 clPtr = m->condLikes[m->condLikeIndex[chain][p->index]];
6204 for (k=0; k<m->numOmegaCats; k++)
6207 clPtr += m->numChars * m->numModelStates;
6210 /* find codon frequencies */
6211 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
6213 /* find category frequencies */
6214 omegaCatFreq = GetParamSubVals (m->omega, chain, state[chain]);
6216 /* find site scaler */
6217 lnScaler = m->scalers[m->siteScalerIndex[chain]];
6219 /* find nSitesOfPat */
6220 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
6222 *lnL = 0.0; /* reset lnL */
6224 for (c=m->numDummyChars; c<m->numChars; c++)
6227 for (k=0; k<m->numOmegaCats; k++)
6230 for (j=0; j<nStates; j++)
6231 catLike += clP[k][j] * bs[j];
6232 like += catLike * omegaCatFreq[k];
6235 /* check against LIKE_EPSILON (values close to zero are problematic) */
6236 if (like < LIKE_EPSILON)
6238 # ifdef DEBUG_LIKELIHOOD
6239 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
6241 (*lnL) = MRBFLT_NEG_MAX;
6247 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
6255 #if defined (SSE_ENABLED)
6256 /*------------------------------------------------------------------
6258 | Likelihood_NY98_SSE: Codon model with three selection categories,
6259 | after Nielsen and Yang (1998).
6261 -------------------------------------------------------------------*/
6262 int Likelihood_NY98_SSE (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
6264 int c, j, k, nStates;
6265 MrBFlt like, *bs, *omegaCatFreq;
6266 CLFlt *lnScaler, *nSitesOfPat, *lnL_SSE;
6267 __m128 *clPtr, **clP;
6268 __m128 m1, mCatLike, mLike;
6271 m = &modelSettings[division];
6273 /* number of states */
6274 nStates = m->numModelStates;
6276 /* find conditional likelihood pointers */
6277 clPtr = (__m128 *) m->condLikes[m->condLikeIndex[chain][p->index]];
6279 for (k=0; k<m->numOmegaCats; k++)
6282 clPtr += m->numSSEChars * nStates;
6285 /* find codon frequencies */
6286 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
6288 /* find category frequencies */
6289 omegaCatFreq = GetParamSubVals (m->omega, chain, state[chain]);
6291 /* find site scaler */
6292 lnScaler = m->scalers[m->siteScalerIndex[chain]];
6294 /* find nSitesOfPat */
6295 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
6297 *lnL = 0.0; /* reset lnL */
6299 lnL_SSE = m->lnL_SSE;
6300 for (c=0; c<m->numSSEChars; c++)
6302 mLike = _mm_setzero_ps ();
6303 for (k=0; k<m->numOmegaCats; k++)
6305 mCatLike = _mm_setzero_ps ();
6306 for (j=0; j<nStates; j++)
6308 m1 = _mm_mul_ps (clP[k][j], _mm_set1_ps ((CLFlt)bs[j]));
6309 mCatLike = _mm_add_ps (mCatLike, m1);
6311 m1 = _mm_mul_ps (mCatLike, _mm_set1_ps ((CLFlt)omegaCatFreq[k]));
6312 mLike = _mm_add_ps (mLike, m1);
6315 _mm_store_ps (lnL_SSE, mLike);
6316 lnL_SSE += FLOATS_PER_VEC;
6318 for (c=m->numDummyChars; c<m->numChars; c++)
6320 like = m->lnL_SSE[c];
6321 /* check against LIKE_EPSILON (values close to zero are problematic) */
6322 if (like < LIKE_EPSILON)
6324 # ifdef DEBUG_LIKELIHOOD
6325 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
6327 (*lnL) = MRBFLT_NEG_MAX;
6333 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
6342 /*------------------------------------------------------------------
6344 | Likelihood_Res: restriction site model with or without rate
6347 -------------------------------------------------------------------*/
6348 int Likelihood_Res (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
6351 MrBFlt *bs, freq, like, pUnobserved, pObserved;
6352 CLFlt *clPtr, **clP, *lnScaler, *nSitesOfPat;
6356 m = &modelSettings[division];
6358 /* find conditional likelihood pointer */
6359 clPtr = m->condLikes[m->condLikeIndex[chain][p->index]];
6361 for (k=0; k<m->numGammaCats; k++)
6364 clPtr += m->numChars * m->numModelStates;
6367 /* find base frequencies */
6368 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
6370 /* find category frequencies */
6371 freq = 1.0 / m->numGammaCats;
6373 /* find site scaler */
6374 lnScaler = m->scalers[m->siteScalerIndex[chain]];
6376 /* find nSitesOfPat */
6377 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
6379 *lnL = 0.0; /* reset lnL */
6382 for (c=0; c<m->numDummyChars; c++)
6385 for (k=0; k<m->numGammaCats; k++)
6387 like += (clP[k][0]*bs[0] + clP[k][1]*bs[1]) * freq;
6390 pUnobserved += like * exp(lnScaler[c]);
6393 pObserved = 1.0 - pUnobserved;
6394 if (pObserved < LIKE_EPSILON)
6396 # ifdef DEBUG_LIKELIHOOD
6397 MrBayesPrint ("%s WARNING: p(Observed) < LIKE_EPSILON - for division %d p(Observed) = %1.30le\n", spacer, division+1, pObserved);
6399 (*lnL) = MRBFLT_NEG_MAX;
6404 for (c=m->numDummyChars; c<m->numChars; c++)
6407 for (k=0; k<m->numGammaCats; k++)
6409 like += (clP[k][0]*bs[0] + clP[k][1]*bs[1]) * freq;
6412 /* check against LIKE_EPSILON (values close to zero are problematic) */
6413 if (like < LIKE_EPSILON)
6415 # ifdef DEBUG_LIKELIHOOD
6416 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
6418 (*lnL) = MRBFLT_NEG_MAX;
6424 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
6428 /* correct for absent characters */
6429 (*lnL) -= log(pObserved) * (m->numUncompressedChars);
6435 #if defined (SSE_ENABLED)
6436 /*------------------------------------------------------------------
6438 | Likelihood_Res_SSE: 4by4 nucleotide models with or without rate
6441 -------------------------------------------------------------------*/
6442 int Likelihood_Res_SSE (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
6445 MrBFlt freq, *bs, like, pUnobserved, pObserved;
6446 CLFlt *lnScaler, *nSitesOfPat, *lnL_SSE;
6447 __m128 *clPtr, **clP;
6448 __m128 m1, mA, mB, mFreq, mLike;
6451 /* find model settings and pInvar, invar cond likes */
6452 m = &modelSettings[division];
6454 /* find conditional likelihood pointers */
6455 clPtr = (__m128 *) (m->condLikes[m->condLikeIndex[chain][p->index]]);
6457 for (k=0; k<m->numGammaCats; k++)
6460 clPtr += m->numSSEChars * m->numModelStates;
6462 lnL_SSE = m->lnL_SSE;
6464 /* find base frequencies */
6465 bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
6466 mA = _mm_set1_ps ((CLFlt)(bs[0]));
6467 mB = _mm_set1_ps ((CLFlt)(bs[1]));
6469 freq = 1.0 / m->numGammaCats;
6470 mFreq = _mm_set1_ps ((CLFlt)(freq));
6472 /* find tree scaler */
6473 lnScaler = m->scalers[m->siteScalerIndex[chain]];
6475 /* find nSitesOfPat */
6476 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
6481 /* calculate variable likelihood */
6482 for (c=0; c<m->numSSEChars; c++)
6484 mLike = _mm_setzero_ps ();
6485 for (k=0; k<m->numGammaCats; k++)
6487 m1 = _mm_mul_ps (clP[k][0], mA);
6488 mLike = _mm_add_ps (mLike, m1);
6489 m1 = _mm_mul_ps (clP[k][1], mB);
6490 mLike = _mm_add_ps (mLike, m1);
6493 mLike = _mm_mul_ps (mLike, mFreq);
6494 _mm_store_ps (lnL_SSE, mLike);
6495 lnL_SSE += FLOATS_PER_VEC;
6499 for (c=0; c<m->numDummyChars; c++)
6501 like = m->lnL_SSE[c];
6502 pUnobserved += like * exp(lnScaler[c]);
6505 pObserved = 1.0 - pUnobserved;
6506 if (pObserved < LIKE_EPSILON)
6508 # ifdef DEBUG_LIKELIHOOD
6509 MrBayesPrint ("%s WARNING: p(Observed) < LIKE_EPSILON - for division %d p(Observed) = %1.30le\n", spacer, division+1, pObserved);
6511 (*lnL) = MRBFLT_NEG_MAX;
6516 for (c=m->numDummyChars; c<m->numChars; c++)
6518 like = m->lnL_SSE[c];
6519 /* check against LIKE_EPSILON (values close to zero are problematic) */
6520 if (like < LIKE_EPSILON)
6522 # ifdef DEBUG_LIKELIHOOD
6523 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
6525 (*lnL) = MRBFLT_NEG_MAX;
6531 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
6535 /* correct for absent characters */
6536 (*lnL) -= log(pObserved) * (m->numUncompressedChars);
6543 /*------------------------------------------------------------------
6545 | Likelihood_Std: variable states model with or without rate
6548 -------------------------------------------------------------------*/
6549 int Likelihood_Std (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
6551 int b, c, j, k, nBetaCats, nGammaCats, nStates, numReps;
6552 MrBFlt catLike, catFreq, gammaFreq, like, *bs, *bsBase,
6553 pUnobserved, pObserved;
6554 CLFlt *clPtr, **clP, *lnScaler, *nSitesOfPat;
6557 m = &modelSettings[division];
6560 for (c=0; c<m->numChars; c++)
6562 if (m->nStates[c] == 2)
6563 numReps += m->numBetaCats * 2;
6565 numReps += m->nStates[c];
6567 /* find conditional likelihood pointers */
6568 clPtr = m->condLikes[m->condLikeIndex[chain][p->index]];
6570 for (k=0; k<m->numGammaCats; k++)
6576 /* find base frequencies */
6577 bsBase = GetParamStdStateFreqs (m->stateFreq, chain, state[chain]);
6579 /* find gamma category number and frequencies */
6580 nGammaCats = m->numGammaCats;
6581 gammaFreq = 1.0 / nGammaCats;
6583 /* find site scaler */
6584 lnScaler = m->scalers[m->siteScalerIndex[chain]];
6586 /* find nSitesOfPat */
6587 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
6589 *lnL = 0.0; /* reset lnL */
6591 if (m->numBetaCats == 1)
6594 catFreq = gammaFreq;
6595 for (c=j=0; c<m->numDummyChars; c++)
6598 nStates = m->nStates[c];
6599 bs = bsBase + m->bsIndex[c];
6600 for (k=0; k<nGammaCats; k++)
6603 for (j=0; j<nStates; j++)
6604 catLike += clP[k][j] * bs[j];
6605 like += catLike * catFreq;
6608 pUnobserved += like * exp(lnScaler[c]);
6611 pObserved = 1.0 - pUnobserved;
6612 if (pObserved < LIKE_EPSILON)
6613 pObserved = LIKE_EPSILON;
6615 for (c=m->numDummyChars; c<m->numChars; c++)
6618 nStates = m->nStates[c];
6619 bs = bsBase + m->bsIndex[c];
6621 for (k=0; k<nGammaCats; k++)
6624 for (j=0; j<nStates; j++)
6625 catLike += clP[k][j] * bs[j];
6626 like += catLike * catFreq;
6629 /* check against LIKE_EPSILON (values close to zero are problematic) */
6630 if (like < LIKE_EPSILON)
6632 # ifdef DEBUG_LIKELIHOOD
6633 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
6635 (*lnL) = MRBFLT_NEG_MAX;
6641 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
6648 for (c=j=0; c<m->numDummyChars; c++)
6651 nStates = m->nStates[c];
6652 bs = bsBase + m->bsIndex[c];
6655 nBetaCats = m->numBetaCats;
6656 catFreq = gammaFreq / nBetaCats;
6661 catFreq = gammaFreq;
6663 for (b=0; b<nBetaCats; b++)
6665 for (k=0; k<nGammaCats; k++)
6668 for (j=0; j<nStates; j++)
6669 catLike += clP[k][j] * bs[j];
6670 like += catLike * catFreq;
6675 pUnobserved += like * exp(lnScaler[c]);
6678 pObserved = 1.0 - pUnobserved;
6679 if (pObserved < LIKE_EPSILON)
6680 pObserved = LIKE_EPSILON;
6682 for (c=m->numDummyChars; c<m->numChars; c++)
6685 nStates = m->nStates[c];
6686 bs = bsBase + m->bsIndex[c];
6689 nBetaCats = m->numBetaCats;
6690 catFreq = gammaFreq / nBetaCats;
6695 catFreq = gammaFreq;
6697 for (b=0; b<nBetaCats; b++)
6699 for (k=0; k<nGammaCats; k++)
6702 for (j=0; j<nStates; j++)
6703 catLike += clP[k][j] * bs[j];
6704 like += catLike * catFreq;
6709 /* check against LIKE_EPSILON (values close to zero are problematic) */
6710 if (like < LIKE_EPSILON)
6712 # ifdef DEBUG_LIKELIHOOD
6713 MrBayesPrint ("%s WARNING: In LIKE_EPSILON - for division %d char %d has like = %1.30le\n", spacer, division+1, c+1, like);
6715 (*lnL) = MRBFLT_NEG_MAX;
6721 (*lnL) += (lnScaler[c] + log(like)) * nSitesOfPat[c];
6726 /* correct for absent characters */
6727 (*lnL) -= log(pObserved) * (m->numUncompressedChars);
6733 /*------------------------------------------------------------------
6735 | Likelihood_Pars: likelihood under the Tuffley and Steel (1997)
6736 | model for characters with constant number of states. The idea
6739 | Tuffley, C., and M. Steel. 1997. Links between maximum likelihood
6740 | and maximum parsimony under a simple model of site substitution.
6741 | Bull. Math. Bio. 59:581-607.
6743 | The likelihood under the Tuffley and Steel (1997) model is:
6747 | where L is the likelihood
6748 | k is the number of character states
6749 | T is the parsimony tree length
6750 | n is the number of characters
6752 | The parsimony calculator does not use character packing; this is
6753 | to enable reweighting of characters
6755 | Note that this is an empirical Bayes approach in that it uses the
6756 | maximum likelihood branch length.
6758 -------------------------------------------------------------------*/
6759 int Likelihood_Pars (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
6762 BitsLong done, *pL, *pR, *pP, *pA, *oldpP, x;
6763 CLFlt nParsChars, treeLength;
6764 CLFlt length, *nSitesOfPat, *newNodeLength, oldNodeLength;
6768 /* Find model settings */
6769 m = &modelSettings[division];
6772 t = GetTree(m->brlens,chain,state[chain]);
6774 /* Get parsimony tree length */
6775 treeLength = (CLFlt) m->parsTreeLength[2 * chain + state[chain]];
6777 /* Get number of states */
6778 nStates = m->numStates;
6780 /* Get number of sites of pat */
6781 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
6783 /* Mark the nodes that can be stop nodes */
6784 /* (there must not be any touched side nodes below them) */
6787 for (i=t->nIntNodes-1; i>=0; i--)
6789 p = t->intDownPass[i];
6791 if (p->upDateCl == YES && p->anc->marked == YES)
6793 if (p->left->upDateCl == NO || p->right->upDateCl == NO)
6798 /* Now make downpass node by node */
6799 for (i=0; i<t->nIntNodes; i++)
6801 p = t->intDownPass[i];
6803 /* continue if no work needs to be done */
6804 if (p->upDateCl == NO)
6808 FlipCondLikeSpace(m, chain, p->index);
6810 /* find parsimony sets for the node and its environment */
6811 pL = m->parsSets[m->condLikeIndex[chain][p->left->index ]];
6812 pR = m->parsSets[m->condLikeIndex[chain][p->right->index]];
6813 oldpP = m->parsSets[m->condLikeScratchIndex[p->index ]];
6814 pP = m->parsSets[m->condLikeIndex[chain][p->index ]];
6816 /* find old and new node lengths */
6817 oldNodeLength = m->parsNodeLens[m->condLikeScratchIndex[p->index]];
6818 newNodeLength = &m->parsNodeLens[m->condLikeIndex[chain][p->index]];
6820 if (t->isRooted == NO && p->anc->anc == NULL)
6822 pA = m->parsSets[m->condLikeIndex[chain][p->anc->index]];
6824 for (c=0; c<m->numChars; c++)
6830 length += nSitesOfPat[c];
6832 if ((x & pA[c]) == 0)
6833 length += nSitesOfPat[c];
6836 treeLength += (length - oldNodeLength);
6837 newNodeLength[0] = length;
6843 for (c=0; c<m->numChars; c++)
6849 length += nSitesOfPat[c];
6852 done |= (x^oldpP[c]);
6854 treeLength += (length - oldNodeLength);
6855 newNodeLength[0] = length;
6856 if (p->marked == YES && done == 0)
6861 /* Count number of characters in the partition. It is calculated
6862 on the fly because this number is going to differ for
6863 different chains if character reweighting is used. */
6864 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
6866 for (c=0; c<m->numChars; c++)
6867 nParsChars += nSitesOfPat[c];
6869 /* Calculate likelihood from parsimony tree length */
6870 *lnL = - ((treeLength + nParsChars) * log (nStates));
6872 /* Store current parsimony tree length */
6873 m->parsTreeLength[2 * chain + state[chain]] = treeLength;
6880 int Likelihood_ParsCodon (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
6892 MrBayesPrint ("%s Parsimony calculator for codons not yet implemented\n", spacer);
6899 /*------------------------------------------------------------------
6901 | Likelihood_Pars: likelihood under the Tuffley and Steel (1997)
6902 | model for characters with constant number of states. The idea
6905 | Tuffley, C., and M. Steel. 1997. Links between maximum likelihood
6906 | and maximum parsimony under a simple model of site substitution.
6907 | Bull. Math. Bio. 59:581-607.
6909 | The likelihood under the Tuffley and Steel (1997) model is:
6913 | where L is the likelihood
6914 | k is the number of character states
6915 | T is the parsimony tree length
6916 | n is the number of characters
6918 | The parsimony calculator does not use character packing; this is
6919 | to enable reweighting of characters
6921 | Note that this is an empirical Bayes approach in that it uses the
6922 | maximum likelihood branch length.
6924 | This variant of the calculator assumes that the number of states
6925 | is variable. It does not take state order into account.
6927 -------------------------------------------------------------------*/
6928 int Likelihood_ParsStd (TreeNode *p, int division, int chain, MrBFlt *lnL, int whichSitePats)
6931 BitsLong *pL, *pR, *pP, *pA, x;
6937 /* Find model settings */
6938 m = &modelSettings[division];
6941 t = GetTree(m->brlens,chain,state[chain]);
6943 /* Allocate space for parsimony tree length */
6944 treeLength = (CLFlt *) SafeCalloc (m->numChars, sizeof (CLFlt));
6946 /* Get number of states */
6947 nStates = m->nStates;
6949 /* Get number of sites of pat */
6950 nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
6952 /* Make downpass node by node; do not skip any nodes */
6953 for (i=0; i<t->nIntNodes; i++)
6955 p = t->intDownPass[i];
6958 FlipCondLikeSpace(m, chain, p->index);
6960 /* find parsimony sets for the node and its environment */
6961 pL = m->parsSets[m->condLikeIndex[chain][p->left->index ]];
6962 pR = m->parsSets[m->condLikeIndex[chain][p->right->index]];
6963 pP = m->parsSets[m->condLikeIndex[chain][p->index ]];
6965 if (t->isRooted == NO && p->anc->anc == NULL)
6967 pA = m->parsSets[m->condLikeIndex[chain][p->anc->index]];
6968 for (c=0; c<m->numChars; c++)
6974 treeLength[c] += nSitesOfPat[c];
6976 if ((x & pA[c]) == 0)
6977 treeLength[c] += nSitesOfPat[c];
6983 for (c=0; c<m->numChars; c++)
6989 treeLength[c] += nSitesOfPat[c];
6996 /* Calculate the likelihood one character at a time */
6998 for (c=0; c<m->numChars; c++)
7000 *lnL -= ((treeLength[c] + nSitesOfPat[c]) * log (nStates[c]));
7003 /* Free space for parsimony character states */
7010 /*-----------------------------------------------------------------
7012 | LaunchLogLikeForDivision: calculate the log likelihood of the
7013 | new state of the chain for a single division
7015 -----------------------------------------------------------------*/
7016 void LaunchLogLikeForDivision(int chain, int d, MrBFlt* lnL)
7022 # if defined (TIMING_ANALIZ)
7023 clock_t CPUTimeStart;
7026 m = &modelSettings[d];
7027 tree = GetTree(m->brlens, chain, state[chain]);
7029 if (m->upDateCijk == YES)
7031 if (UpDateCijk(d, chain)== ERROR)
7033 (*lnL) = MRBFLT_NEG_MAX; /* effectively abort the move */
7039 # if defined (BEAGLE_ENABLED)
7040 if (m->useBeagle == YES)
7042 LaunchBEAGLELogLikeForDivision(chain, d, m, tree, lnL);
7047 /* Flip and copy or reset site scalers */
7048 FlipSiteScalerSpace(m, chain);
7049 if (m->upDateAll == YES)
7050 ResetSiteScalers(m, chain);
7052 CopySiteScalers(m, chain);
7054 if (m->parsModelId == NO)
7056 for (i=0; i<tree->nIntNodes; i++)
7058 p = tree->intDownPass[i];
7060 if (p->left->upDateTi == YES)
7062 /* shift state of ti probs for node */
7063 FlipTiProbsSpace (m, chain, p->left->index);
7064 m->TiProbs (p->left, d, chain);
7067 if (p->right->upDateTi == YES)
7069 /* shift state of ti probs for node */
7070 FlipTiProbsSpace (m, chain, p->right->index);
7071 m->TiProbs (p->right, d, chain);
7074 if (tree->isRooted == NO)
7076 if (p->anc->anc == NULL /* && p->upDateTi == YES */)
7078 /* shift state of ti probs for node */
7079 FlipTiProbsSpace (m, chain, p->index);
7080 m->TiProbs (p, d, chain);
7084 if (p->upDateCl == YES)
7086 if (tree->isRooted == NO)
7088 if (p->anc->anc == NULL)
7090 TIME(m->CondLikeRoot (p, d, chain),CPUCondLikeRoot);
7094 TIME(m->CondLikeDown (p, d, chain),CPUCondLikeDown);
7099 TIME(m->CondLikeDown (p, d, chain),CPUCondLikeDown);
7102 if (m->scalersSet[chain][p->index] == YES && m->upDateAll == NO)
7104 # if defined (SSE_ENABLED)
7105 if (m->useSSE == YES)
7107 TIME(RemoveNodeScalers_SSE (p, d, chain),CPUScalersRemove);
7111 TIME(RemoveNodeScalers (p, d, chain),CPUScalersRemove);
7114 TIME(RemoveNodeScalers (p, d, chain),CPUScalersRemove);
7117 FlipNodeScalerSpace (m, chain, p->index);
7118 m->scalersSet[chain][p->index] = NO;
7120 if (p->scalerNode == YES)
7122 TIME(m->CondLikeScaler (p, d, chain),CPUScalers);
7127 TIME(m->Likelihood (tree->root->left, d, chain, lnL, (chainId[chain] % chainParams.numChains)),CPULilklihood);
7132 /*----------------------------------------------------------------
7134 | RemoveNodeScalers: Remove node scalers
7136 -----------------------------------------------------------------*/
7137 int RemoveNodeScalers (TreeNode *p, int division, int chain)
7140 CLFlt *scP, *lnScaler;
7143 m = &modelSettings[division];
7144 assert (m->scalersSet[chain][p->index] == YES);
7147 scP = m->scalers[m->nodeScalerIndex[chain][p->index]];
7149 /* find site scalers */
7150 lnScaler = m->scalers[m->siteScalerIndex[chain]];
7152 /* remove scalers */
7153 for (c=0; c<m->numChars; c++)
7154 lnScaler[c] -= scP[c];
7160 #if defined (SSE_ENABLED)
7161 /*----------------------------------------------------------------
7163 | RemoveNodeScalers_SSE: Remove node scalers, SSE code
7165 -----------------------------------------------------------------*/
7166 int RemoveNodeScalers_SSE (TreeNode *p, int division, int chain)
7169 __m128 *scP_SSE, *lnScaler_SSE;
7172 m = &modelSettings[division];
7173 assert (m->scalersSet[chain][p->index] == YES);
7176 scP_SSE = (__m128*)(m->scalers[m->nodeScalerIndex[chain][p->index]]);
7178 /* find site scalers */
7179 lnScaler_SSE = (__m128*)(m->scalers[m->siteScalerIndex[chain]]);
7181 /* remove scalers */
7182 for (c=0; c<m->numSSEChars; c++)
7184 lnScaler_SSE[c] = _mm_sub_ps(lnScaler_SSE[c], scP_SSE[c]);
7193 int SetBinaryQMatrix (MrBFlt **a, int whichChain, int division)
7198 /* set up pointers to the appropriate model information */
7199 m = &modelSettings[division];
7200 assert (m->numModelStates == 2);
7202 bs = GetParamSubVals (m->stateFreq, whichChain, state[whichChain]);
7203 scaler = 1.0 / (2*bs[0]*bs[1]);
7204 a[0][0]= -bs[1]*scaler;
7205 a[0][1]= bs[1]*scaler;
7206 a[1][0]= bs[0]*scaler;
7207 a[1][1]= -bs[0]*scaler;
7213 int SetNucQMatrix (MrBFlt **a, int n, int whichChain, int division, MrBFlt rateMult, MrBFlt *rA, MrBFlt *rS)
7215 register int i, j, k;
7216 int isTransition=0, nDiff, rtNum=0;
7217 MrBFlt scaler, mult=0.0, probOn, sum, *swr, s01, s10, s[4][4], nonsyn, *rateValues=NULL, *bs, dN, dS;
7220 # if defined BEAGLE_ENABLED
7224 /* set up pointers to the appropriate model information */
7225 mp = &modelParams[division];
7226 m = &modelSettings[division];
7227 assert (m->numModelStates == n);
7229 /* All of the models that are set up in this function require the frequencies
7230 of the nucleotides (or doublets or codons). They will also require either
7231 a transition/transversion rate ratio or the GTR rate parameters. The
7232 "rateValues" will either be
7234 rateValues[0] = transtion/transversion rate (kappa)
7238 rateValues[0] = A <-> C rate
7239 rateValues[1] = A <-> G rate
7240 rateValues[2] = A <-> T rate
7241 rateValues[3] = C <-> G rate
7242 rateValues[4] = C <-> T rate
7243 rateValues[5] = G <-> T rate
7245 for nst=6 models. */
7246 bs = GetParamSubVals (m->stateFreq, whichChain, state[whichChain]);
7249 rateValues = GetParamVals(m->tRatio, whichChain, state[whichChain]);
7250 # if defined (BEAGLE_ENABLED)
7251 /* transversions assumed to have rate 1.0; */
7252 trans = rateValues[0];
7253 if (m->numModelStates == 4) /* code to satisfy Beagle */
7255 rateValues = (MrBFlt *) SafeCalloc (6, sizeof(MrBFlt));
7256 rateValues[0] = rateValues[2] = rateValues[3] = rateValues[5] =1.0; /* Setting transversions */
7257 rateValues[1] = rateValues[4] = trans; /* Setting transitions */
7262 else if (m->nst == 6 || m->nst == NST_MIXED)
7263 rateValues = GetParamVals(m->revMat, whichChain, state[whichChain]);
7264 # if defined (BEAGLE_ENABLED)
7265 else if (m->nst == 1 && m->numModelStates == 4) /* code to satisfy Beagle */
7267 rateValues = (MrBFlt *) SafeCalloc (6, sizeof(MrBFlt));
7269 rateValues[i] = 1.0;
7277 Here, we set the rate matrix for the GTR model (Tavare, 1986). We
7278 need not only the 6 rates for this model (rateValues), but also the
7279 base frequencies (bs). */
7281 /* set diagonal of Q matrix to 0 */
7285 /* initialize Q matrix */
7289 for (j=i+1; j<4; j++)
7291 if (i == 0 && j == 1)
7292 mult = rateValues[0];
7293 else if (i == 0 && j == 2)
7294 mult = rateValues[1];
7295 else if (i == 0 && j == 3)
7296 mult = rateValues[2];
7297 else if (i == 1 && j == 2)
7298 mult = rateValues[3];
7299 else if (i == 1 && j == 3)
7300 mult = rateValues[4];
7301 else if (i == 2 && j == 3)
7302 mult = rateValues[5];
7303 a[i][i] -= (a[i][j] = bs[j] * mult);
7304 a[j][j] -= (a[j][i] = bs[i] * mult);
7305 scaler += bs[i] * a[i][j];
7306 scaler += bs[j] * a[j][i];
7310 /* rescale Q matrix */
7311 scaler = 1.0 / scaler;
7316 else if (n == 8) /* we have a 4 X 4 covarion model */
7318 /* 8 X 8 covarion model:
7320 Here, we set the rate matrix for the covarion model (Tuffley and
7321 Steel, 1997). We need the rate parameters of the model
7322 (contained in rateValues), the frequencies of the four nucleotides,
7323 and the switching rates to completely specify the rate matrix. We
7324 first set up the 4 X 4 submatrix that represents changes (the upper
7325 left portion of the 8 X 8 matrix). Note that if we have rate
7326 variation across sites, that we need to deal with the multiplication
7327 in the rate matrix (i.e., we cannot simply deal with rate variation
7328 by multiplying the branch length by a rate multiplier as we can
7329 with other models). Instead, we multiply the scaled rate matrix
7330 by the rate multiplier. */
7332 /* Get the switching rates. The rate of off->on is s01 and the rate
7333 of on->off is s10. The stationary probability of the switch process
7334 is prob1 = s01/(s01+s10) and prob0 = s10/(s01+s10). */
7335 swr = GetParamVals (m->switchRates, whichChain, state[whichChain]);
7338 probOn = s01 / (s01 + s10);
7340 /* set matrix a to 0 */
7345 /* set up the 4 X 4 matrix representing substitutions (s[][]; upper left) */
7351 for (j=i+1; j<4; j++)
7355 scaler += bs[i] * s[i][j] * probOn;
7356 scaler += bs[j] * s[j][i] * probOn;
7360 else if (m->nst == 2)
7365 for (j=i+1; j<4; j++)
7367 if ((i == 0 && j == 2) || (i == 2 && j == 0) || (i == 1 && j == 3) || (i == 3 && j == 1))
7368 mult = rateValues[0];
7371 s[i][j] = bs[j] * mult;
7372 s[j][i] = bs[i] * mult;
7373 scaler += bs[i] * s[i][j] * probOn;
7374 scaler += bs[j] * s[j][i] * probOn;
7383 for (j=i+1; j<4; j++)
7385 if (i == 0 && j == 1)
7386 mult = rateValues[0];
7387 else if (i == 0 && j == 2)
7388 mult = rateValues[1];
7389 else if (i == 0 && j == 3)
7390 mult = rateValues[2];
7391 else if (i == 1 && j == 2)
7392 mult = rateValues[3];
7393 else if (i == 1 && j == 3)
7394 mult = rateValues[4];
7395 else if (i == 2 && j == 3)
7396 mult = rateValues[5];
7398 s[i][j] = bs[j] * mult;
7399 s[j][i] = bs[i] * mult;
7400 scaler += bs[i] * s[i][j] * probOn;
7401 scaler += bs[j] * s[j][i] * probOn;
7406 /* rescale off diagonal elements of s[][] matrix */
7407 scaler = 1.0 / scaler;
7417 /* now, scale s[][] by rate factor */
7423 s[i][j] *= rateMult;
7427 /* put in diagonal elements of s[][] */
7436 s[i][i] = -(sum + s10);
7439 /* Now, put s[][] into top left portion of a matrix and fill in the
7440 other parts of the matrix with the appropriate switching rates. */
7459 printf ("%1.10lf ", a[i][j]);
7463 printf ("%lf ", bs[i]);
7465 printf ("s01 = %lf s10 = %lf pi1 = %lf pi0 = %lf\n", s01, s10, probOn, 1-probOn);
7470 /* 16 X 16 doublet model:
7472 We have a doublet model. The states are in the order AA, AC, AG, AT, CA, CC
7473 CG, CT, GA, GC, GG, GT, TA, TC, TG, TT. The rate matrix is straight-forward
7474 to set up. We simply multiply the rate parameter (e.g., the ti/tv rate
7475 ratio) by the doublet frequencies. */
7477 /* set diagonal of Q matrix to 0 */
7478 for (i=0; i<16; i++)
7481 if (m->nst == 1) /* F81-like doublet model */
7484 for (i=0; i<16; i++)
7486 for (j=i+1; j<16; j++)
7488 if (((doublet[i].first & doublet[j].first) == 0) && ((doublet[i].second & doublet[j].second) == 0))
7492 a[i][i] -= (a[i][j] = bs[j] * mult);
7493 a[j][j] -= (a[j][i] = bs[i] * mult);
7494 scaler += bs[i] * a[i][j];
7495 scaler += bs[j] * a[j][i];
7499 else if (m->nst == 2) /* HKY-like doublet model */
7502 for (i=0; i<16; i++)
7504 for (j=i+1; j<16; j++)
7506 if (((doublet[i].first & doublet[j].first) == 0) && ((doublet[i].second & doublet[j].second) == 0))
7510 if ((doublet[i].first & doublet[j].first) == 0)
7512 if ((doublet[i].first + doublet[j].first) == 5 || (doublet[i].first + doublet[j].first) == 10)
7513 mult = rateValues[0];
7519 if ((doublet[i].second + doublet[j].second) == 5 || (doublet[i].second + doublet[j].second) == 10)
7520 mult = rateValues[0];
7525 a[i][i] -= (a[i][j] = bs[j] * mult);
7526 a[j][j] -= (a[j][i] = bs[i] * mult);
7527 scaler += bs[i] * a[i][j];
7528 scaler += bs[j] * a[j][i];
7532 else /* GTR-like doublet model */
7535 for (i=0; i<16; i++)
7537 for (j=i+1; j<16; j++)
7539 if (((doublet[i].first & doublet[j].first) == 0) && ((doublet[i].second & doublet[j].second) == 0))
7543 if ((doublet[i].first & doublet[j].first) == 0)
7545 if ((doublet[i].first + doublet[j].first) == 3)
7546 mult = rateValues[0];
7547 else if ((doublet[i].first + doublet[j].first) == 5)
7548 mult = rateValues[1];
7549 else if ((doublet[i].first + doublet[j].first) == 9)
7550 mult = rateValues[2];
7551 else if ((doublet[i].first + doublet[j].first) == 6)
7552 mult = rateValues[3];
7553 else if ((doublet[i].first + doublet[j].first) == 10)
7554 mult = rateValues[4];
7556 mult = rateValues[5];
7560 if ((doublet[i].second + doublet[j].second) == 3)
7561 mult = rateValues[0];
7562 else if ((doublet[i].second + doublet[j].second) == 5)
7563 mult = rateValues[1];
7564 else if ((doublet[i].second + doublet[j].second) == 9)
7565 mult = rateValues[2];
7566 else if ((doublet[i].second + doublet[j].second) == 6)
7567 mult = rateValues[3];
7568 else if ((doublet[i].second + doublet[j].second) == 10)
7569 mult = rateValues[4];
7571 mult = rateValues[5];
7574 a[i][i] -= (a[i][j] = bs[j] * mult);
7575 a[j][j] -= (a[j][i] = bs[i] * mult);
7576 scaler += bs[i] * a[i][j];
7577 scaler += bs[j] * a[j][i];
7583 /* rescale Q matrix */
7584 scaler = 1.0 / scaler;
7585 for (i=0; i<16; i++)
7586 for (j=0; j<16; j++)
7591 /* 64(ish) X 64(ish) codon model:
7593 Here, we set the rate matrix for the codon model (see Goldman and
7594 Yang, 1994). Note that we can specifiy any general type of codon
7595 model, with these constraints:
7597 a[i][j] = 0 -> if i and j differ at 2 or 3 nucleotides
7598 a[i][j] = rateValues[0] * bs[j] -> if synonymous A <-> C change
7599 a[i][j] = rateValues[1] * bs[j] -> if synonymous A <-> G change
7600 a[i][j] = rateValues[2] * bs[j] -> if synonymous A <-> T change
7601 a[i][j] = rateValues[3] * bs[j] -> if synonymous C <-> G change
7602 a[i][j] = rateValues[4] * bs[j] -> if synonymous C <-> T change
7603 a[i][j] = rateValues[5] * bs[j] -> if synonymous G <-> T change
7605 a[i][j] = rateValues[0] * nonsyn * bs[j] -> if nonsynonymous A <-> C change
7606 a[i][j] = rateValues[1] * nonsyn * bs[j] -> if nonsynonymous A <-> G change
7607 a[i][j] = rateValues[2] * nonsyn * bs[j] -> if nonsynonymous A <-> T change
7608 a[i][j] = rateValues[3] * nonsyn * bs[j] -> if nonsynonymous C <-> G change
7609 a[i][j] = rateValues[4] * nonsyn * bs[j] -> if nonsynonymous C <-> T change
7610 a[i][j] = rateValues[5] * nonsyn * bs[j] -> if nonsynonymous G <-> T change
7612 Other models, such as the one used by Nielsen & Yang (1998) can be obtained
7613 from this model by restricing transitions and transversions to have the same rate.
7614 nonsyn is the nonsynonymous/synonymous rate ratio (often called the
7615 dN/dS ratio). If we are in this part of the function, then we rely on it
7616 being called with the "rateMult" parameter specifying the dN/dS ratio. Note
7617 that the size of the matrix will never be 64 X 64 as we only consider changes
7618 among coding triplets (i.e., we exclude the stop codons). */
7620 /* get the nonsynonymous/synonymous rate ratio */
7623 /* set diagonal of Q matrix to 0 */
7627 /* set dN and dS rates to zero */
7630 if (m->nst == 1) /* F81-like codon model */
7635 for (j=i+1; j<n; j++)
7640 if (mp->codonNucs[i][k] != mp->codonNucs[j][k])
7649 if (mp->codonAAs[i] == mp->codonAAs[j])
7655 a[i][i] -= (a[i][j] = bs[j] * mult);
7656 a[j][j] -= (a[j][i] = bs[i] * mult);
7657 if (mp->codonAAs[i] == mp->codonAAs[j])
7658 dS += (bs[i] * a[i][j] + bs[j] * a[j][i]);
7660 dN += (bs[i] * a[i][j] + bs[j] * a[j][i]);
7661 scaler += bs[i] * a[i][j];
7662 scaler += bs[j] * a[j][i];
7666 else if (m->nst == 2) /* HKY-like codon model */
7671 for (j=i+1; j<n; j++)
7676 if (mp->codonNucs[i][k] != mp->codonNucs[j][k])
7679 if ((mp->codonNucs[i][k] == 0 && mp->codonNucs[j][k] == 2) || (mp->codonNucs[i][k] == 2 && mp->codonNucs[j][k] == 0) ||
7680 (mp->codonNucs[i][k] == 1 && mp->codonNucs[j][k] == 3) || (mp->codonNucs[i][k] == 3 && mp->codonNucs[j][k] == 1))
7692 if (mp->codonAAs[i] == mp->codonAAs[j])
7696 if (isTransition == YES)
7697 mult *= rateValues[0];
7700 a[i][i] -= (a[i][j] = bs[j] * mult);
7701 a[j][j] -= (a[j][i] = bs[i] * mult);
7702 if (mp->codonAAs[i] == mp->codonAAs[j])
7703 dS += (bs[i] * a[i][j] + bs[j] * a[j][i]);
7705 dN += (bs[i] * a[i][j] + bs[j] * a[j][i]);
7706 scaler += bs[i] * a[i][j];
7707 scaler += bs[j] * a[j][i];
7711 else /* GTR-like codon model */
7716 for (j=i+1; j<n; j++)
7721 if (mp->codonNucs[i][k] != mp->codonNucs[j][k])
7724 if ((mp->codonNucs[i][k] == 0 && mp->codonNucs[j][k] == 1) || (mp->codonNucs[i][k] == 1 && mp->codonNucs[j][k] == 0))
7726 else if ((mp->codonNucs[i][k] == 0 && mp->codonNucs[j][k] == 2) || (mp->codonNucs[i][k] == 2 && mp->codonNucs[j][k] == 0))
7728 else if ((mp->codonNucs[i][k] == 0 && mp->codonNucs[j][k] == 3) || (mp->codonNucs[i][k] == 3 && mp->codonNucs[j][k] == 0))
7730 else if ((mp->codonNucs[i][k] == 1 && mp->codonNucs[j][k] == 2) || (mp->codonNucs[i][k] == 2 && mp->codonNucs[j][k] == 1))
7732 else if ((mp->codonNucs[i][k] == 1 && mp->codonNucs[j][k] == 3) || (mp->codonNucs[i][k] == 3 && mp->codonNucs[j][k] == 1))
7744 if (mp->codonAAs[i] == mp->codonAAs[j])
7749 mult *= rateValues[0];
7750 else if (rtNum == 1)
7751 mult *= rateValues[1];
7752 else if (rtNum == 2)
7753 mult *= rateValues[2];
7754 else if (rtNum == 3)
7755 mult *= rateValues[3];
7756 else if (rtNum == 4)
7757 mult *= rateValues[4];
7759 mult *= rateValues[5];
7762 a[i][i] -= (a[i][j] = bs[j] * mult);
7763 a[j][j] -= (a[j][i] = bs[i] * mult);
7764 if (mp->codonAAs[i] == mp->codonAAs[j])
7765 dS += (bs[i] * a[i][j] + bs[j] * a[j][i]);
7767 dN += (bs[i] * a[i][j] + bs[j] * a[j][i]);
7768 scaler += bs[i] * a[i][j];
7769 scaler += bs[j] * a[j][i];
7774 /* rescale Q matrix */
7775 if (m->nucModelId == NUCMODEL_CODON && m->numOmegaCats > 1)
7777 /* If we have a positive selection model with multiple categories, then
7778 we do not rescale the rate matrix until we have finished generating
7779 all of the necessary rate matrices. The rescaling occurrs in
7786 scaler = 1.0 / scaler;
7790 (*rA) = (*rS) = 1.0;
7798 printf ("%0.5lf ", a[i][j]);
7803 # if defined (BEAGLE_ENABLED)
7804 if ((m->nst == 1 || m->nst == 2) && m->numModelStates == 4)
7812 int SetProteinQMatrix (MrBFlt **a, int n, int whichChain, int division, MrBFlt rateMult)
7814 register int i, j, k;
7816 MrBFlt scaler, probOn, sum, *swr, s01, s10, *bs, *rt;
7819 /* set up pointers to the appropriate model information */
7820 m = &modelSettings[division];
7822 /* get amino acid model ID
7837 if (m->aaModelId >= 0)
7838 aaModelID = m->aaModelId;
7840 aaModelID = (int)*GetParamVals(m->aaModel, whichChain, state[whichChain]);
7842 /* Make certain that we have either 20 or 40 states. Anything
7843 else means we have a real problem. */
7844 if (n != 20 && n != 40)
7846 MrBayesPrint ("%s ERROR: There should be 20 or 40 states for the aa model\n");
7852 /* We have a run-of-the-mill amino acid model (i.e., 20 X 20). */
7853 if (aaModelID == AAMODEL_POISSON)
7855 scaler = 1.0 / 19.0;
7856 for (i=0; i<20; i++)
7858 for (j=i+1; j<20; j++)
7864 for (i=0; i<20; i++)
7867 else if (aaModelID == AAMODEL_EQ)
7869 bs = GetParamSubVals (m->stateFreq, whichChain, state[whichChain]);
7870 for (i=0; i<20; i++)
7871 for (j=0; j<20; j++)
7874 for (i=0; i<20; i++)
7876 for (j=i+1; j<20; j++)
7878 a[i][i] -= (a[i][j] = bs[j]);
7879 a[j][j] -= (a[j][i] = bs[i]);
7880 scaler += bs[i] * a[i][j];
7881 scaler += bs[j] * a[j][i];
7884 scaler = 1.0 / scaler;
7885 for (i=0; i<20; i++)
7886 for (j=0; j<20; j++)
7889 else if (aaModelID == AAMODEL_GTR)
7891 bs = GetParamSubVals (m->stateFreq, whichChain, state[whichChain]);
7892 rt = GetParamVals (m->revMat, whichChain, state[whichChain]);
7893 for (i=0; i<20; i++)
7894 for (j=0; j<20; j++)
7897 for (i=k=0; i<20; i++)
7899 for (j=i+1; j<20; j++)
7901 a[i][i] -= (a[i][j] = bs[j] * rt[k]);
7902 a[j][j] -= (a[j][i] = bs[i] * rt[k]);
7906 for (i=0; i<20; i++)
7907 scaler += -(bs[i] * a[i][i]);
7908 for (i=0; i<20; i++)
7909 for (j=0; j<20; j++)
7912 else if (aaModelID == AAMODEL_JONES)
7914 for (i=0; i<20; i++)
7915 for (j=0; j<20; j++)
7916 a[i][j] = aaJones[i][j];
7918 else if (aaModelID == AAMODEL_DAY)
7920 for (i=0; i<20; i++)
7921 for (j=0; j<20; j++)
7922 a[i][j] = aaDayhoff[i][j];
7924 else if (aaModelID == AAMODEL_MTREV)
7926 for (i=0; i<20; i++)
7927 for (j=0; j<20; j++)
7928 a[i][j] = aaMtrev24[i][j];
7930 else if (aaModelID == AAMODEL_MTMAM)
7932 for (i=0; i<20; i++)
7933 for (j=0; j<20; j++)
7934 a[i][j] = aaMtmam[i][j];
7936 else if (aaModelID == AAMODEL_RTREV)
7938 for (i=0; i<20; i++)
7939 for (j=0; j<20; j++)
7940 a[i][j] = aartREV[i][j];
7942 else if (aaModelID == AAMODEL_WAG)
7944 for (i=0; i<20; i++)
7945 for (j=0; j<20; j++)
7946 a[i][j] = aaWAG[i][j];
7948 else if (aaModelID == AAMODEL_CPREV)
7950 for (i=0; i<20; i++)
7951 for (j=0; j<20; j++)
7952 a[i][j] = aacpREV[i][j];
7954 else if (aaModelID == AAMODEL_VT)
7956 for (i=0; i<20; i++)
7957 for (j=0; j<20; j++)
7958 a[i][j] = aaVt[i][j];
7960 else if (aaModelID == AAMODEL_BLOSUM)
7962 for (i=0; i<20; i++)
7963 for (j=0; j<20; j++)
7964 a[i][j] = aaBlosum[i][j];
7966 else if (aaModelID == AAMODEL_LG)
7968 for (i=0; i<20; i++)
7969 for (j=0; j<20; j++)
7970 a[i][j] = aaLG[i][j];
7974 MrBayesPrint ("%s ERROR: Don't understand which amino acid model is needed\n");
7978 for (i=0; i<20; i++)
7980 for (j=0; j<20; j++)
7981 printf ("%1.3lf ", a[i][j]);
7988 /* 40 X 40 covarion model:
7990 We have a covarion model, and must set up the other quadrants. Note that if
7991 we are at this point in the code, that we have already set up the upper left
7992 portion of the 40 X 40 rate matrix. Note that if we have rate
7993 variation across sites, that we need to deal with the multiplication
7994 in the rate matrix (i.e., we cannot simply deal with rate variation
7995 by multiplying the branch length by a rate multiplier as we can
7996 with other models). Instead, we multiply the scaled rate matrix
7997 by the rate multiplier. */
7999 /* Get the switching rates. The rate of off->on is s01 and the rate
8000 of on->off is s10. The stationary probability of the switch process
8001 is prob1 = s01/(s01+s10) and prob0 = s10/(s01+s10). */
8002 swr = GetParamVals (m->switchRates, whichChain, state[whichChain]);
8005 probOn = s01 / (s01 + s10);
8007 /* set matrix a[][] to 0 */
8008 for (i=0; i<40; i++)
8009 for (j=0; j<40; j++)
8012 /* fill in upper-left sub matrix (where substitutions occur */
8013 if (aaModelID == AAMODEL_POISSON)
8016 for (i=0; i<20; i++)
8018 for (j=i+1; j<20; j++)
8022 scaler += 0.05 * a[i][j] * probOn;
8023 scaler += 0.05 * a[j][i] * probOn;
8027 else if (aaModelID == AAMODEL_EQ)
8029 bs = GetParamSubVals (m->stateFreq, whichChain, state[whichChain]);
8031 for (i=0; i<20; i++)
8033 for (j=i+1; j<20; j++)
8037 scaler += bs[i] * a[i][j] * probOn;
8038 scaler += bs[j] * a[j][i] * probOn;
8042 else if (aaModelID == AAMODEL_GTR)
8044 bs = GetParamSubVals (m->stateFreq, whichChain, state[whichChain]);
8045 rt = GetParamVals (m->revMat, whichChain, state[whichChain]);
8046 for (i=0; i<20; i++)
8047 for (j=0; j<20; j++)
8050 for (i=k=0; i<20; i++)
8052 for (j=i+1; j<20; j++)
8054 a[i][i] -= (a[i][j] = bs[j] * rt[k]);
8055 a[j][j] -= (a[j][i] = bs[i] * rt[k]);
8059 for (i=0; i<20; i++)
8060 scaler += -(bs[i] * a[i][i]);
8061 for (i=0; i<20; i++)
8062 for (j=0; j<20; j++)
8064 for (i=0; i<20; i++)
8066 for (j=i+1; j<20; j++)
8070 scaler += bs[i] * a[i][j] * probOn;
8071 scaler += bs[j] * a[j][i] * probOn;
8075 else if (aaModelID == AAMODEL_JONES)
8078 for (i=0; i<20; i++)
8080 for (j=i+1; j<20; j++)
8082 a[i][j] = aaJones[i][j];
8083 a[j][i] = aaJones[j][i];
8084 scaler += jonesPi[i] * a[i][j] * probOn;
8085 scaler += jonesPi[j] * a[j][i] * probOn;
8089 else if (aaModelID == AAMODEL_DAY)
8092 for (i=0; i<20; i++)
8094 for (j=i+1; j<20; j++)
8096 a[i][j] = aaDayhoff[i][j];
8097 a[j][i] = aaDayhoff[j][i];
8098 scaler += dayhoffPi[i] * a[i][j] * probOn;
8099 scaler += dayhoffPi[j] * a[j][i] * probOn;
8103 else if (aaModelID == AAMODEL_MTREV)
8106 for (i=0; i<20; i++)
8108 for (j=i+1; j<20; j++)
8110 a[i][j] = aaMtrev24[i][j];
8111 a[j][i] = aaMtrev24[j][i];
8112 scaler += mtrev24Pi[i] * a[i][j] * probOn;
8113 scaler += mtrev24Pi[j] * a[j][i] * probOn;
8117 else if (aaModelID == AAMODEL_MTMAM)
8120 for (i=0; i<20; i++)
8122 for (j=i+1; j<20; j++)
8124 a[i][j] = aaMtmam[i][j];
8125 a[j][i] = aaMtmam[j][i];
8126 scaler += mtmamPi[i] * a[i][j] * probOn;
8127 scaler += mtmamPi[j] * a[j][i] * probOn;
8131 else if (aaModelID == AAMODEL_RTREV)
8134 for (i=0; i<20; i++)
8136 for (j=i+1; j<20; j++)
8138 a[i][j] = aartREV[i][j];
8139 a[j][i] = aartREV[j][i];
8140 scaler += rtrevPi[i] * a[i][j] * probOn;
8141 scaler += rtrevPi[j] * a[j][i] * probOn;
8145 else if (aaModelID == AAMODEL_WAG)
8148 for (i=0; i<20; i++)
8150 for (j=i+1; j<20; j++)
8152 a[i][j] = aaWAG[i][j];
8153 a[j][i] = aaWAG[j][i];
8154 scaler += wagPi[i] * a[i][j] * probOn;
8155 scaler += wagPi[j] * a[j][i] * probOn;
8159 else if (aaModelID == AAMODEL_CPREV)
8162 for (i=0; i<20; i++)
8164 for (j=i+1; j<20; j++)
8166 a[i][j] = aacpREV[i][j];
8167 a[j][i] = aacpREV[j][i];
8168 scaler += cprevPi[i] * a[i][j] * probOn;
8169 scaler += cprevPi[j] * a[j][i] * probOn;
8173 else if (aaModelID == AAMODEL_VT)
8176 for (i=0; i<20; i++)
8178 for (j=i+1; j<20; j++)
8180 a[i][j] = aaVt[i][j];
8181 a[j][i] = aaVt[j][i];
8182 scaler += vtPi[i] * a[i][j] * probOn;
8183 scaler += vtPi[j] * a[j][i] * probOn;
8187 else if (aaModelID == AAMODEL_BLOSUM)
8190 for (i=0; i<20; i++)
8192 for (j=i+1; j<20; j++)
8194 a[i][j] = aaBlosum[i][j];
8195 a[j][i] = aaBlosum[j][i];
8196 scaler += blosPi[i] * a[i][j] * probOn;
8197 scaler += blosPi[j] * a[j][i] * probOn;
8201 else if (aaModelID == AAMODEL_LG)
8204 for (i=0; i<20; i++)
8206 for (j=i+1; j<20; j++)
8208 a[i][j] = aaLG[i][j];
8209 a[j][i] = aaLG[j][i];
8210 scaler += lgPi[i] * a[i][j] * probOn;
8211 scaler += lgPi[j] * a[j][i] * probOn;
8217 MrBayesPrint ("%s ERROR: Don't understand which amino acid model is needed\n");
8221 /* rescale off diagonal elements of Q matrix */
8222 scaler = 1.0 / scaler;
8223 for (i=0; i<20; i++)
8225 for (j=0; j<20; j++)
8232 /* now, scale by rate factor */
8233 for (i=0; i<20; i++)
8235 for (j=0; j<20; j++)
8238 a[i][j] *= rateMult;
8242 /* put in diagonal elements */
8243 for (i=0; i<20; i++)
8246 for (j=0; j<20; j++)
8250 a[i][i] = -(sum + s10);
8254 /* fill in the other three submatrices */
8255 for (i=20; i<40; i++)
8257 for (i=0; i<20; i++)
8269 int SetStdQMatrix (MrBFlt **a, int nStates, MrBFlt *bs, int cType)
8274 /* This function sets up ordered or unordered models for standard characters
8275 with unequal stationary state frequencies. It requires the stationary
8276 frequencies of the states (passed when calling the function). It also
8277 needs to know the number of states and the type (ordered or unordered)
8278 of the character. */
8280 /* set Q matrix to 0 */
8281 for (i=0; i<nStates; i++)
8282 for (j=0; j<nStates; j++)
8285 /* initialize Q matrix */
8289 /* unordered characters */
8290 for (i=0; i<nStates; i++)
8292 for (j=0; j<nStates; j++)
8296 a[i][i] -= (a[i][j] = bs[j]);
8297 scaler += bs[i] * a[i][j];
8304 /* ordered characters */
8305 for (i=0; i<nStates; i++)
8307 for (j=0; j<nStates; j++)
8309 if (abs(i - j) == 1)
8311 a[i][i] -= (a[i][j] = bs[j]);
8312 scaler += bs[i] * a[i][j];
8318 /* rescale Q matrix */
8319 for (i=0; i<nStates; i++)
8320 for (j=0; j<nStates; j++)
8323 # if defined DEBUG_SETSTDQMATRIX
8324 for (i=0; i<nStates; i++)
8326 for (j=0; j<nStates; j++)
8327 printf ("%0.5lf ", a[i][j]);
8336 int TiProbs_Fels (TreeNode *p, int division, int chain)
8339 MrBFlt t, u, x, z, beta, bigPi_j[4], pij, bigPij,
8340 *catRate, baseRate, theRate, *pis, length;
8344 m = &modelSettings[division];
8346 /* find transition probabilities */
8347 tiP = m->tiProbs[m->tiProbsIndex[chain][p->index]];
8349 /* get base frequencies */
8350 pis = GetParamSubVals (m->stateFreq, chain, state[chain]);
8352 /* get rate multipliers (for gamma & partition specific rates) */
8354 baseRate = GetRate (division, chain);
8355 /* compensate for invariable sites if appropriate */
8356 if (m->pInvar != NULL)
8357 baseRate /= (1.0 - (*GetParamVals(m->pInvar, chain, state[chain])));
8358 /* get category rates */
8359 if (m->shape == NULL)
8362 catRate = GetParamSubVals (m->shape, chain, state[chain]);
8365 beta = (0.5 / ((pis[0] + pis[2])*(pis[1] + pis[3]) + ((pis[0]*pis[2]) + (pis[1]*pis[3]))));
8367 bigPi_j[0] = (pis[0] + pis[2]);
8368 bigPi_j[1] = (pis[1] + pis[3]);
8369 bigPi_j[2] = (pis[0] + pis[2]);
8370 bigPi_j[3] = (pis[1] + pis[3]);
8373 if (m->cppEvents != NULL)
8375 length = GetParamSubVals (m->cppEvents, chain, state[chain])[p->index];
8377 else if (m->tk02BranchRates != NULL)
8379 length = GetParamSubVals (m->tk02BranchRates, chain, state[chain])[p->index];
8381 else if (m->igrBranchRates != NULL)
8383 length = GetParamSubVals (m->igrBranchRates, chain, state[chain])[p->index];
8385 else if (m->mixedBrchRates != NULL)
8387 length = GetParamSubVals (m->mixedBrchRates, chain, state[chain])[p->index];
8392 /* numerical errors will ensue if we allow very large or very small branch lengths,
8393 which might occur in relaxed clock models */
8395 /* fill in values */
8396 for (k=index=0; k<m->numGammaCats; k++)
8398 t = length * baseRate * catRate[k];
8402 /* Fill in identity matrix */
8414 else if (t > TIME_MAX)
8416 /* Fill in stationary matrix */
8419 tiP[index++] = (CLFlt) pis[j];
8423 /* calculate probabilities */
8428 bigPij = bigPi_j[j];
8430 u = 1.0/bigPij - 1.0;
8432 z = (bigPij - pij) / bigPij;
8435 tiP[index++] = (CLFlt) (pij + pij * u * x + z * x);
8437 tiP[index++] = (CLFlt) (pij + pij * u * x - (pij/bigPij) * x);
8447 /*----------------------------------------------------------------
8449 | TiProbs_Gen: Calculates transition probabilities for general
8450 | models with or without rate variation. This function does
8453 | 1. codon models with omega variation or
8454 | 2. covarion models with rate variation
8456 | In either of these cases, TiProbs_GenCov is used
8458 -----------------------------------------------------------------*/
8459 int TiProbs_Gen (TreeNode *p, int division, int chain)
8461 register int i, j, k, n, s, index;
8462 MrBFlt t, *catRate, baseRate, *eigenValues, *cijk, *bs,
8463 EigValexp[64], sum, *ptr, theRate, correctionFactor,
8468 m = &modelSettings[division];
8469 n = m->numModelStates;
8471 /* find the correction factor to make branch lengths
8472 in terms of expected number of substitutions per character */
8473 correctionFactor = 1.0;
8474 if (m->dataType == DNA || m->dataType == RNA)
8476 if (m->nucModelId == NUCMODEL_DOUBLET)
8477 correctionFactor = 2.0;
8478 else if (m->nucModelId == NUCMODEL_CODON)
8479 correctionFactor = 3.0;
8482 /* find transition probabilities */
8483 tiP = m->tiProbs[m->tiProbsIndex[chain][p->index]];
8485 /* get rate multipliers (for gamma & partition specific rates) */
8487 baseRate = GetRate (division, chain);
8489 /* compensate for invariable sites if appropriate */
8490 if (m->pInvar != NULL)
8491 baseRate /= (1.0 - (*GetParamVals(m->pInvar, chain, state[chain])));
8493 /* get category rates */
8494 if (m->shape == NULL)
8497 catRate = GetParamSubVals (m->shape, chain, state[chain]);
8499 /* get eigenvalues and cijk pointers */
8500 eigenValues = m->cijks[m->cijkIndex[chain]];
8501 cijk = eigenValues + (2 * n);
8504 if (m->cppEvents != NULL)
8506 length = GetParamSubVals (m->cppEvents, chain, state[chain])[p->index];
8508 else if (m->tk02BranchRates != NULL)
8510 length = GetParamSubVals (m->tk02BranchRates, chain, state[chain])[p->index];
8512 else if (m->igrBranchRates != NULL)
8514 length = GetParamSubVals (m->igrBranchRates, chain, state[chain])[p->index];
8516 else if (m->mixedBrchRates != NULL)
8518 length = GetParamSubVals (m->mixedBrchRates, chain, state[chain])[p->index];
8523 /* fill in values */
8524 for (k=index=0; k<m->numGammaCats; k++)
8526 t = length * baseRate * catRate[k] * correctionFactor;
8530 /* Fill in identity matrix */
8542 else if (t > TIME_MAX)
8545 bs = GetParamSubVals(m->stateFreq, chain, state[chain]);
8546 /* Fill in stationary matrix */
8549 tiP[index++] = (CLFlt) bs[j];
8553 /* We actually need to do some work... */
8555 EigValexp[s] = exp(eigenValues[s] * t);
8564 sum += (*ptr++) * EigValexp[s];
8565 tiP[index++] = (CLFlt) ((sum < 0.0) ? 0.0 : sum);
8572 printf ("v = %lf (%d)\n", t, p->index);
8573 for (i=index=0; i<n; i++)
8576 printf ("%1.4lf ", tiP[index++]);
8586 /*----------------------------------------------------------------
8588 | TiProbs_GenCov: Calculates transition probabilities for codon
8589 | models with omega variation or covarion models with
8592 -----------------------------------------------------------------*/
8593 int TiProbs_GenCov (TreeNode *p, int division, int chain)
8595 register int i, j, k, n, s, index;
8596 int sizeOfSingleCijk;
8597 MrBFlt t, *eigenValues, *cijk, EigValexp[64], sum, *ptr, correctionFactor,
8602 m = &modelSettings[division];
8603 n = m->numModelStates;
8605 /* find the correction factor to make branch lengths
8606 in terms of expected number of substitutions per character */
8607 correctionFactor = 1.0;
8608 if (m->dataType == DNA || m->dataType == RNA)
8610 if (m->nucModelId == NUCMODEL_DOUBLET)
8611 correctionFactor = 2.0;
8612 else if (m->nucModelId == NUCMODEL_CODON)
8613 correctionFactor = 3.0;
8616 /* find transition probabilities */
8617 tiP = m->tiProbs[m->tiProbsIndex[chain][p->index]];
8619 /* get eigenvalues and cijk pointers */
8620 eigenValues = m->cijks[m->cijkIndex[chain]];
8621 cijk = eigenValues + (2 * n);
8623 /* get offset size (we need to move the pointers to the appropriate
8624 cijk information for these models) */
8625 sizeOfSingleCijk = m->cijkLength / m->nCijkParts;
8628 if (m->cppEvents != NULL)
8630 length = GetParamSubVals (m->cppEvents, chain, state[chain])[p->index];
8632 else if (m->tk02BranchRates != NULL)
8634 length = GetParamSubVals (m->tk02BranchRates, chain, state[chain])[p->index];
8636 else if (m->igrBranchRates != NULL)
8638 length = GetParamSubVals (m->igrBranchRates, chain, state[chain])[p->index];
8640 else if (m->mixedBrchRates != NULL)
8642 length = GetParamSubVals (m->mixedBrchRates, chain, state[chain])[p->index];
8647 /* numerical errors will ensue if we allow very large or very small branch lengths,
8648 which might occur in relaxed clock models */
8650 /* fill in values */
8651 for (k=index=0; k<m->nCijkParts; k++)
8653 t = length * correctionFactor;
8657 /* Fill in identity matrix */
8669 else if (t > TIME_MAX)
8672 bs = GetParamSubVals(m->stateFreq, chain, state[chain]);
8673 /* Fill in stationary matrix */
8676 tiP[index++] = (CLFlt) bs[j];
8680 /* We actually need to do some work... */
8682 EigValexp[s] = exp(eigenValues[s] * t);
8691 sum += (*ptr++) * EigValexp[s];
8692 tiP[index++] = (CLFlt) ((sum < 0.0) ? 0.0 : sum);
8696 /* increment pointers by m->cijkLength */
8697 if (k+1 < m->nCijkParts)
8699 /* shift pointers */
8700 eigenValues += sizeOfSingleCijk;
8701 cijk += sizeOfSingleCijk;
8707 for (i=index=0; i<n; i++)
8710 printf ("%1.4lf ", tiP[index++]);
8719 /*-----------------------------------------------------------------
8721 | TiProbs_Hky: update transition probabilities for 4by4
8722 | nucleotide model with nst == 2 (K80/HKY85)
8723 | with or without rate variation
8725 ------------------------------------------------------------------*/
8726 int TiProbs_Hky (TreeNode *p, int division, int chain)
8729 MrBFlt t, kap, u, w, x, y, z, beta, bigPi_j[4], pij, bigPij, *pis,
8730 *catRate, baseRate, theRate, length;
8734 m = &modelSettings[division];
8736 /* find transition probabilities */
8737 tiP = m->tiProbs[m->tiProbsIndex[chain][p->index]];
8740 kap = *GetParamVals (m->tRatio, chain, state[chain]);
8742 /* get base frequencies */
8743 pis = GetParamSubVals (m->stateFreq, chain, state[chain]);
8745 /* get rate multipliers (for gamma & partition specific rates) */
8747 baseRate = GetRate (division, chain);
8748 /* compensate for invariable sites if appropriate */
8749 if (m->pInvar != NULL)
8750 baseRate /= (1.0 - (*GetParamVals(m->pInvar, chain, state[chain])));
8751 /* get category rates */
8752 if (m->shape == NULL)
8755 catRate = GetParamSubVals (m->shape, chain, state[chain]);
8758 beta = 0.5 / ((pis[0] + pis[2])*(pis[1] + pis[3]) + kap*((pis[0]*pis[2]) + (pis[1]*pis[3])));
8760 bigPi_j[0] = pis[0] + pis[2];
8761 bigPi_j[1] = pis[1] + pis[3];
8762 bigPi_j[2] = pis[0] + pis[2];
8763 bigPi_j[3] = pis[1] + pis[3];
8766 if (m->cppEvents != NULL)
8768 length = GetParamSubVals (m->cppEvents, chain, state[chain])[p->index];
8770 else if (m->tk02BranchRates != NULL)
8772 length = GetParamSubVals (m->tk02BranchRates, chain, state[chain])[p->index];
8774 else if (m->igrBranchRates != NULL)
8776 length = GetParamSubVals (m->igrBranchRates, chain, state[chain])[p->index];
8778 else if (m->mixedBrchRates != NULL)
8780 length = GetParamSubVals (m->mixedBrchRates, chain, state[chain])[p->index];
8785 /* numerical errors will ensue if we allow very large or very small branch lengths,
8786 which might occur in relaxed clock models */
8788 /* fill in values */
8789 for (k=index=0; k<m->numGammaCats; k++)
8791 t = length * baseRate * catRate[k];
8795 /* Fill in identity matrix */
8807 else if (t > TIME_MAX)
8809 /* Fill in stationary matrix */
8812 tiP[index++] = (CLFlt) pis[j];
8816 /* calculate probabilities */
8821 bigPij = bigPi_j[j];
8823 u = 1.0/bigPij - 1.0;
8824 w = -beta * (1.0 + bigPij * (kap - 1.0));
8827 z = (bigPij - pij) / bigPij;
8830 tiP[index++] = (CLFlt) (pij + pij * u * x + z * y);
8831 else if ((i == 0 && j == 2) || (i == 2 && j == 0) || (i == 1 && j == 3) || (i == 3 && j == 1))
8832 tiP[index++] = (CLFlt) (pij + pij * u * x - (pij/bigPij) * y);
8834 tiP[index++] = (CLFlt) (pij * (1.0 - x));
8844 /*-----------------------------------------------------------------
8846 | TiProbs_JukesCantor: update transition probabilities for 4by4
8847 | nucleotide model with nst == 1 (Jukes-Cantor)
8848 | with or without rate variation
8850 ------------------------------------------------------------------*/
8851 int TiProbs_JukesCantor (TreeNode *p, int division, int chain)
8853 /* calculate Jukes Cantor transition probabilities */
8856 MrBFlt t, *catRate, baseRate, length;
8857 CLFlt pNoChange, pChange, *tiP;
8860 m = &modelSettings[division];
8862 /* find transition probabilities */
8863 tiP = m->tiProbs[m->tiProbsIndex[chain][p->index]];
8866 if (m->shape == NULL)
8867 catRate = &baseRate;
8869 catRate = GetParamSubVals (m->shape, chain, state[chain]);
8872 if (m->cppEvents != NULL)
8874 length = GetParamSubVals (m->cppEvents, chain, state[chain])[p->index];
8876 else if (m->tk02BranchRates != NULL)
8878 length = GetParamSubVals (m->tk02BranchRates, chain, state[chain])[p->index];
8880 else if (m->igrBranchRates != NULL)
8882 length = GetParamSubVals (m->igrBranchRates, chain, state[chain])[p->index];
8884 else if (m->mixedBrchRates != NULL)
8886 length = GetParamSubVals (m->mixedBrchRates, chain, state[chain])[p->index];
8891 /* numerical errors will ensue if we allow very large or very small branch lengths,
8892 which might occur in relaxed clock models */
8894 /* fill in values */
8895 for (k=index=0; k<m->numGammaCats; k++)
8897 t = length*catRate[k];
8901 /* Fill in identity matrix */
8913 else if (t > TIME_MAX)
8915 /* Fill in stationary matrix */
8918 tiP[index++] = 0.25;
8922 /* calculate probabilities */
8923 pChange = (CLFlt) (0.25 - 0.25 * exp(-(4.0/3.0)*t));
8924 pNoChange = (CLFlt) (0.25 + 0.75 * exp(-(4.0/3.0)*t));
8930 tiP[index++] = pNoChange;
8932 tiP[index++] = pChange;
8942 /*-----------------------------------------------------------------
8944 | TiProbs_Res: update transition probabilities for binary
8945 | restriction site model with or without rate variation
8947 ------------------------------------------------------------------*/
8948 int TiProbs_Res (TreeNode *p, int division, int chain)
8951 MrBFlt baseRate, eV, mu, theRate, v,
8952 *bs, *catRate, length;
8956 /* find model settings for the division */
8957 m = &modelSettings[division];
8959 /* find transition probabilities */
8960 tiP = m->tiProbs[m->tiProbsIndex[chain][p->index]];
8963 baseRate = GetRate (division, chain);
8965 if (m->shape == NULL)
8968 catRate = GetParamSubVals (m->shape, chain, state[chain]);
8970 /* find base frequencies */
8971 bs = GetParamSubVals(m->stateFreq, chain, state[chain]);
8973 /* calculate scaling factor */
8974 mu = 1.0 / (2.0 * bs[0] * bs[1]);
8977 if (m->cppEvents != NULL)
8979 length = GetParamSubVals (m->cppEvents, chain, state[chain])[p->index];
8981 else if (m->tk02BranchRates != NULL)
8983 length = GetParamSubVals (m->tk02BranchRates, chain, state[chain])[p->index];
8985 else if (m->igrBranchRates != NULL)
8987 length = GetParamSubVals (m->igrBranchRates, chain, state[chain])[p->index];
8989 else if (m->mixedBrchRates != NULL)
8991 length = GetParamSubVals (m->mixedBrchRates, chain, state[chain])[p->index];
8996 /* numerical errors will ensue if we allow very large or very small branch lengths,
8997 which might occur in relaxed clock models */
8999 /* fill in values */
9000 for (k=index=0; k<m->numGammaCats; k++)
9002 v = length * baseRate * catRate[k];
9006 /* Fill in identity matrix */
9007 tiP[index++] = (CLFlt) (bs[0] + bs[1]);
9008 tiP[index++] = (CLFlt) (bs[1] - bs[1]);
9009 tiP[index++] = (CLFlt) (bs[0] - bs[0]);
9010 tiP[index++] = (CLFlt) (bs[1] + bs[0]);
9012 else if (v > TIME_MAX)
9014 /* Fill in stationary matrix */
9015 tiP[index++] = (CLFlt) bs[0];
9016 tiP[index++] = (CLFlt) bs[1];
9017 tiP[index++] = (CLFlt) bs[0];
9018 tiP[index++] = (CLFlt) bs[1];
9022 /* calculate probabilities */
9024 tiP[index++] = (CLFlt) (bs[0] + bs[1] * eV);
9025 tiP[index++] = (CLFlt) (bs[1] - bs[1] * eV);
9026 tiP[index++] = (CLFlt) (bs[0] - bs[0] * eV);
9027 tiP[index++] = (CLFlt) (bs[1] + bs[0] * eV);
9035 /*-----------------------------------------------------------------
9037 | TiProbs_Std: update transition probabilities for
9038 | variable states model with or without rate variation
9040 ------------------------------------------------------------------*/
9041 int TiProbs_Std (TreeNode *p, int division, int chain)
9043 int b, c, i, j, k, n, s, nStates, index=0, index2;
9044 MrBFlt v, eV1, eV2, eV3, eV4, eV5, *catRate,
9045 baseRate, theRate, pi, f1, f2, f3, f4, f5, f6, f7, root,
9046 *eigenValues, *cijk, sum, *bs, mu, length;
9047 CLFlt pNoChange, pChange, *tiP;
9049 # if defined (DEBUG_TIPROBS_STD)
9053 m = &modelSettings[division];
9055 /* find transition probabilities */
9056 tiP = m->tiProbs[m->tiProbsIndex[chain][p->index]];
9058 /* get rate multiplier */
9060 baseRate = GetRate (division, chain);
9062 /* get category rates */
9063 if (m->shape == NULL)
9066 catRate = GetParamSubVals (m->shape, chain, state[chain]);
9068 # if defined (DEBUG_TIPROBS_STD)
9069 /* find base frequencies */
9070 bs = GetParamStdStateFreqs (m->stateFreq, chain, state[chain]);
9074 if (m->cppEvents != NULL)
9076 length = GetParamSubVals (m->cppEvents, chain, state[chain])[p->index];
9078 else if (m->tk02BranchRates != NULL)
9080 length = GetParamSubVals (m->tk02BranchRates, chain, state[chain])[p->index];
9082 else if (m->igrBranchRates != NULL)
9084 length = GetParamSubVals (m->igrBranchRates, chain, state[chain])[p->index];
9086 else if (m->mixedBrchRates != NULL)
9088 length = GetParamSubVals (m->mixedBrchRates, chain, state[chain])[p->index];
9093 /* numerical errors will ensue if we allow very large or very small branch lengths, which might
9094 occur in relaxed clock models; an elegant solution would be to substitute the stationary
9095 probs and initial probs but for now we truncate lengths at small or large values TODO */
9096 if (length > BRLENS_MAX)
9097 length = BRLENS_MAX;
9098 else if (length < BRLENS_MIN)
9099 length = BRLENS_MIN;
9101 /* fill in values; this has to be done differently if state freqs are not equal */
9102 if (m->stateFreq->paramId == SYMPI_EQUAL)
9104 /* equal state frequencies */
9105 /* fill in values for unordered characters */
9107 # if defined (DEBUG_TIPROBS_STD)
9110 for (nStates=2; nStates<=10; nStates++)
9112 if (m->isTiNeeded[nStates-2] == NO)
9114 for (k=0; k<m->numGammaCats; k++)
9116 /* calculate probabilities */
9117 v = length*catRate[k]*baseRate;
9118 eV1 = exp(-(nStates / (nStates - 1.0)) * v);
9119 pChange = (CLFlt) ((1.0 / nStates) - ((1.0 / nStates) * eV1));
9120 pNoChange = (CLFlt) ((1.0 / nStates) + ((nStates - 1.0) / nStates) * eV1);
9122 pChange = (CLFlt) 0.0;
9123 for (i=0; i<nStates; i++)
9125 for (j=0; j<nStates; j++)
9128 tiP[index++] = pNoChange;
9130 tiP[index++] = pChange;
9133 # if defined (DEBUG_TIPROBS_STD)
9134 PrintTiProbs (tiP+index-(nStates*nStates), bs+index3, nStates);
9137 # if defined (DEBUG_TIPROBS_STD)
9142 /* fill in values for 3-state ordered character */
9143 if (m->isTiNeeded[9] == YES)
9146 for (k=0; k<m->numGammaCats; k++)
9148 /* calculate probabilities */
9149 v = length * catRate[k] * baseRate;
9150 eV1 = exp (-(3.0 / 4.0) * v);
9151 eV2 = exp (-(9.0 / 4.0) * v);
9154 tiP[index] = (CLFlt) ((1.0 / 3.0) + (eV1 / 2.0) + (eV2 / 6.0));
9155 /* pij(0,1) = pij(1,0) */
9156 tiP[index+1] = tiP[index+3] = (CLFlt) ((1.0 / 3.0) - (eV2 / 3.0));
9158 tiP[index+2] = (CLFlt) ((1.0 / 3.0) - (eV1 / 2.0) + (eV2 / 6.0));
9160 tiP[index+4] = (CLFlt) ((1.0 / 3.0) + (2.0 * eV2 / 3.0));
9162 /* fill in mirror part of matrix */
9166 tiP[index++] = tiP[index2--];
9168 /* make sure no value is negative */
9169 for (i=index-(nStates*nStates); i<index; i++) {
9171 tiP[i] = (CLFlt) 0.0;
9173 # if defined (DEBUG_TIPROBS_STD)
9174 PrintTiProbs (tiP+index-(nStates*nStates), bs+index3, nStates);
9178 # if defined (DEBUG_TIPROBS_STD)
9183 /* 4-state ordered character */
9184 if (m->isTiNeeded[10] == YES)
9192 for (k=0; k<m->numGammaCats; k++)
9194 /* calculate probabilities */
9195 v = length * catRate[k] * baseRate;
9196 eV1 = 1.0 / (exp ((4.0 * v) / 3.0));
9197 eV2 = exp ((2.0 * (root - 2.0) * v) / 3.0) / root;
9198 eV3 = 1.0 / (root * exp ((2.0 * (root + 2.0) * v) / 3.0));
9201 tiP[index] = (CLFlt) (pi * (1.0 + eV1 + (f1*eV2) + (f2*eV3)));
9202 /* pij(0,1) = pij(1,0) */
9203 tiP[index+1] = tiP[index+4] = (CLFlt) (pi * (1.0 - eV1 + eV2 - eV3));
9204 /* pij(0,2) = tiP(1,3) */
9205 tiP[index+2] = tiP[index+7] = (CLFlt) (pi * (1.0 - eV1 - eV2 + eV3));
9207 tiP[index+3] = (CLFlt) (pi * (1.0 + eV1 - (f1*eV2) - (f2*eV3)));
9209 tiP[index+5] = (CLFlt) (pi * (1.0 + eV1 + (f2*eV2) + (f1*eV3)));
9211 tiP[index+6] = (CLFlt) (pi * (1.0 + eV1 - (f2*eV2) - (f1*eV3)));
9213 /* fill in mirror part of matrix */
9217 tiP[index++] = tiP[index2--];
9219 /* make sure no value is negative */
9220 for (i=index-(nStates*nStates); i<index; i++) {
9222 tiP[i] = (CLFlt) 0.0;
9224 # if defined (DEBUG_TIPROBS_STD)
9225 PrintTiProbs (tiP+index-(nStates*nStates), bs+index3, nStates);
9228 # if defined (DEBUG_TIPROBS_STD)
9233 /* 5-state ordered character */
9234 if (m->isTiNeeded[11] == YES)
9249 for (k=0; k<m->numGammaCats; k++)
9251 /* calculate probabilities */
9252 v = length * catRate[k] * baseRate;
9255 eV1 = exp ((root - 3.0) * v);
9256 eV2 = exp (-(root + 3.0) * v);
9257 eV3 = exp ((root - 5.0) * v);
9258 eV4 = exp (-(root + 5.0) * v);
9261 tiP[index] = (CLFlt) (pi* (1.0 + (f1*eV3) + (f2*eV1) + (f3*eV2) + (f4*eV4)));
9262 /* pij(0,1) = pij(1,0) */
9263 tiP[index+1] = tiP[index+5] =
9264 (CLFlt) (pi*(1.0 - (eV3/2.0) + (f5*eV1) - (f5*eV2) - (eV4/2.0)));
9265 /* pij(0,2) = pij(2,0) */
9266 tiP[index+2] = tiP[index+10] = (CLFlt) (pi*(1.0 - (f6*eV3) + (f7*eV4)));
9267 /* pij(0,3) = pij(1,4) */
9268 tiP[index+3] = tiP[index+9] =
9269 (CLFlt) (pi*(1.0 - (eV3/2.0) - (f5*eV1) + (f5*eV2) - (eV4/2.0)));
9271 tiP[index+4] = (CLFlt) (pi*(1.0 + (f1*eV3) - (f2*eV1) - (f3*eV2) + (f4*eV4)));
9273 tiP[index+6] = (CLFlt) (pi*(1.0 + (f4*eV3) + (f3*eV1) + (f2*eV2) + (f1*eV4)));
9274 /* pij(1,2) = pij(2,1) */
9275 tiP[index+7] = tiP[index+11] = (CLFlt) (pi*(1.0 + (f7*eV3) - (f6*eV4)));
9277 tiP[index+8] = (CLFlt) (pi*(1.0 + (f4*eV3) - (f3*eV1) - (f2*eV2) + (f1*eV4)));
9279 tiP[index+12] = (CLFlt) (pi*(1.0 + (2.0*eV3) + (2.0*eV4)));
9281 /* fill in mirror part of matrix */
9284 for (i=0; i<12; i++)
9285 tiP[index++] = tiP[index2--];
9287 /* make sure no value is negative */
9288 for (i=index-(nStates*nStates); i<index; i++) {
9290 tiP[i] = (CLFlt) 0.0;
9292 # if defined (DEBUG_TIPROBS_STD)
9293 PrintTiProbs (tiP+index-(nStates*nStates), bs+index3, nStates);
9296 # if defined (DEBUG_TIPROBS_STD)
9301 /* 6-state ordered character */
9302 if (m->isTiNeeded[12] == YES)
9308 f4 = (3.0 / (2.0 * root));
9314 for (k=0; k<m->numGammaCats; k++)
9316 /* calculate probabilities */
9317 v = length * catRate[k] * baseRate;
9323 eV4 = exp (3.0 * (root - 2.0) * v);
9324 eV5 = exp (-3.0 * (root + 2.0) * v);
9327 tiP[index] = (CLFlt) (pi* (1.0 + (0.5*eV1) + eV2 + (1.5*eV3) + (f1*eV4) + (f2*eV5)));
9328 /* pij(0,1) = pij(1,0) */
9329 tiP[index+1] = tiP[index+6] = (CLFlt) (pi*(1.0 - eV1 - eV2 + (f3*eV4) + (f4*eV5)));
9330 /* pij(0,2) = pij(2,0) */
9331 tiP[index+2] = tiP[index+12] =
9332 (CLFlt) (pi*(1.0 + (0.5*eV1) - eV2 - (1.5*eV3) + (0.5*eV4) + (0.5*eV5)));
9333 /* pij(0,3) = pij(2,5) */
9334 tiP[index+3] = tiP[index+17] =
9335 (CLFlt) (pi*(1.0 + (0.5*eV1) + eV2 - (1.5*eV3) - (0.5*eV4) - (0.5*eV5)));
9336 /* pij(0,4) = pij(1,5) */
9337 tiP[index+4] = tiP[index+11] = (CLFlt) (pi*(1.0 - eV1 + eV2 - (f3*eV4) - (f4*eV5)));
9339 tiP[index+5] = (CLFlt) (pi*(1.0 + (0.5*eV1) - eV2 + (1.5*eV3) - (f1*eV4) - (f2*eV5)));
9341 tiP[index+7] = (CLFlt) (pi*(1.0 + (2.0*eV1) + eV2 + eV4 + eV5));
9342 /* pij(1,2) = pij(2,1) */
9343 tiP[index+8] = tiP[index+13] = (CLFlt) (pi*(1.0 - eV1 + eV2 - (f4*eV4) - (f3*eV5)));
9344 /* pij(1,3) = pij(2,4) */
9345 tiP[index+9] = tiP[index+16] = (CLFlt) (pi*(1.0 - eV1 - eV2 + (f4*eV4) + (f3*eV5)));
9347 tiP[index+10] = (CLFlt) (pi*(1.0 + (2.0*eV1) - eV2 - eV4 - eV5));
9349 tiP[index+14] = (CLFlt) (pi*(1.0 + (0.5*eV1) + eV2 + (1.5*eV3) + (f2*eV4) + (f1*eV5)));
9351 tiP[index+15] = (CLFlt) (pi*(1.0 + (0.5*eV1) - eV2 + (1.5*eV3) - (f2*eV4) - (f1*eV5)));
9353 /* fill in mirror part of matrix */
9356 for (i=0; i<18; i++)
9357 tiP[index++] = tiP[index2--];
9359 /* make sure no value is negative */
9360 for (i=index-(nStates*nStates); i<index; i++) {
9362 tiP[i] = (CLFlt) 0.0;
9364 # if defined (DEBUG_TIPROBS_STD)
9365 PrintTiProbs (tiP+index-(nStates*nStates), bs+index3, nStates);
9368 # if defined (DEBUG_TIPROBS_STD)
9375 /* unequal state frequencies */
9378 /* first fill in for binary characters using beta categories if needed */
9379 if (m->isTiNeeded[0] == YES)
9381 /* find base frequencies */
9382 bs = GetParamStdStateFreqs (m->stateFreq, chain, state[chain]);
9384 /* cycle through beta and gamma cats */
9385 for (b=0; b<m->numBetaCats; b++)
9387 mu = 1.0 / (2.0 * bs[0] * bs[1]);
9388 for (k=0; k<m->numGammaCats; k++)
9390 /* calculate probabilities */
9391 v = length*catRate[k]*baseRate;
9392 eV1 = exp(- mu * v);
9393 tiP[index++] = (CLFlt) (bs[0] + (bs[1] * eV1));
9394 tiP[index++] = (CLFlt) (bs[1] - (bs[1] * eV1));
9395 tiP[index++] = (CLFlt) (bs[0] - (bs[0] * eV1));
9396 tiP[index++] = (CLFlt) (bs[1] + (bs[0] * eV1));
9398 /* update stationary state frequency pointer */
9403 /* now use general algorithm for the other cases */
9404 if (m->cijkLength > 0)
9406 /* first update cijk if necessary */
9407 if (m->cijkLength > 0 && m->upDateCijk == YES)
9409 if (UpDateCijk (division, chain) == ERROR)
9413 /* then get first set of eigenvalues */
9414 eigenValues = m->cijks[m->cijkIndex[chain]];
9416 /* and cycle through the relevant characters */
9417 for (c=0; c<m->stateFreq->nSympi; c++)
9419 n = m->stateFreq->sympinStates[c];
9421 /* fill in values */
9422 for (k=0; k<m->numGammaCats; k++)
9424 v = length * baseRate * catRate[k];
9425 cijk = eigenValues + (2 * n);
9433 sum += (*cijk++) * exp(eigenValues[s] * v);
9434 tiP[index++] = (CLFlt) ((sum < 0.0) ? 0.0 : sum);
9439 /* update eigenValues pointer */
9440 eigenValues += (n * n * n) + (2 * n);
9449 int UpDateCijk (int whichPart, int whichChain)
9451 int c, i, j, k, n, n3, isComplex, sizeOfSingleCijk, cType, numQAllocated;
9452 MrBFlt **q[100], **eigvecs, **inverseEigvecs;
9453 MrBFlt *eigenValues, *eigvalsImag, *cijk;
9454 MrBFlt *bs, *bsBase, *rateOmegaValues=NULL, rA=0.0, rS=0.0, posScaler, *omegaCatFreq=NULL;
9455 complex **Ceigvecs, **CinverseEigvecs;
9458 # if defined (BEAGLE_ENABLED)
9460 double *beagleEigvecs=NULL, *beagleInverseEigvecs=NULL;
9463 /* get a pointer to the model settings for this partition */
9464 m = &modelSettings[whichPart];
9465 assert (m->upDateCijk == YES);
9467 /* we should only go through here if we have cijk information available for the partition */
9468 if (m->cijkLength > 0)
9470 /* flip cijk space */
9471 FlipCijkSpace(m, whichChain);
9473 /* figure out information on either omega values or rate values, if necessary */
9474 if (m->dataType == DNA || m->dataType == RNA)
9476 if (m->nucModelId == NUCMODEL_CODON) /* we have a NY98 model */
9478 rateOmegaValues = GetParamVals(m->omega, whichChain, state[whichChain]);
9479 if (m->numOmegaCats > 1)
9480 omegaCatFreq = GetParamSubVals (m->omega, whichChain, state[whichChain]);
9482 else if (m->nCijkParts > 1 && m->nucModelId == NUCMODEL_4BY4 && m->numModelStates == 8) /* we have a covarion model */
9483 rateOmegaValues = GetParamSubVals (m->shape, whichChain, state[whichChain]); /* with rate variation */
9485 else if (m->dataType == PROTEIN)
9487 if (m->nCijkParts > 1) /* we have a covarion model */
9488 rateOmegaValues = GetParamSubVals (m->shape, whichChain, state[whichChain]); /* with rate variation */
9490 # if defined (BEAGLE_ENABLED)
9491 else if (m->dataType == RESTRICTION){}
9493 else if (m->dataType != STANDARD)
9495 MrBayesPrint ("%s ERROR: Should not be updating cijks!\n", spacer);
9499 if (m->dataType == STANDARD)
9501 /* set pointers and other stuff needed */
9504 eigenValues = m->cijks[m->cijkIndex[whichChain]];
9505 q[0] = AllocateSquareDoubleMatrix (10);
9506 eigvecs = AllocateSquareDoubleMatrix (10);
9507 inverseEigvecs = AllocateSquareDoubleMatrix (10);
9508 Ceigvecs = AllocateSquareComplexMatrix (10);
9509 CinverseEigvecs = AllocateSquareComplexMatrix (10);
9510 bsBase = GetParamStdStateFreqs (m->stateFreq, whichChain, state[whichChain]);
9512 /* cycle over characters needing cijks */
9513 for (c=0; c<p->nSympi; c++)
9515 n = p->sympinStates[c];
9516 bs = bsBase + p->sympiBsIndex[c];
9517 cType = p->sympiCType[c];
9519 eigvalsImag = eigenValues + n;
9520 cijk = eigenValues + (2 * n);
9521 if (SetStdQMatrix (q[0], n, bs, cType) == ERROR)
9523 isComplex = GetEigens (n, q[0], eigenValues, eigvalsImag, eigvecs, inverseEigvecs, Ceigvecs, CinverseEigvecs);
9524 if (isComplex == NO)
9526 CalcCijk (n, cijk, eigvecs, inverseEigvecs);
9530 if (isComplex == YES)
9531 MrBayesPrint ("%s ERROR: Complex eigenvalues found!\n", spacer);
9533 MrBayesPrint ("%s ERROR: Computing eigenvalues problem!\n", spacer);
9536 eigenValues += (n3 + (2 * n));
9541 /* all other data types */
9542 numQAllocated = m->nCijkParts;
9543 sizeOfSingleCijk = m->cijkLength / m->nCijkParts;
9544 n = m->numModelStates;
9546 # if defined (BEAGLE_ENABLED)
9547 if (m->useBeagle == YES)
9548 eigenValues = m->cijks[m->cijkIndex[whichChain]/m->nCijkParts];
9550 eigenValues = m->cijks[m->cijkIndex[whichChain]];
9552 eigenValues = m->cijks[m->cijkIndex[whichChain]];
9554 eigvalsImag = eigenValues + n;
9555 cijk = eigenValues + (2 * n);
9556 for (k=0; k<numQAllocated; k++)
9557 q[k] = AllocateSquareDoubleMatrix (n);
9558 eigvecs = AllocateSquareDoubleMatrix (n);
9559 inverseEigvecs = AllocateSquareDoubleMatrix (n);
9560 Ceigvecs = AllocateSquareComplexMatrix (n);
9561 CinverseEigvecs = AllocateSquareComplexMatrix (n);
9562 bs = GetParamSubVals (m->stateFreq, whichChain, state[whichChain]);
9564 if (m->nCijkParts == 1)
9566 if (m->dataType == DNA || m->dataType == RNA)
9568 if (m->nucModelId == NUCMODEL_CODON)
9570 if (SetNucQMatrix (q[0], n, whichChain, whichPart, rateOmegaValues[0], &rA, &rS) == ERROR)
9575 if (SetNucQMatrix (q[0], n, whichChain, whichPart, 1.0, &rA, &rS) == ERROR)
9579 # if defined (BEAGLE_ENABLED)
9580 else if (m->dataType == RESTRICTION)
9582 SetBinaryQMatrix (q[0], whichChain, whichPart);
9587 if (SetProteinQMatrix (q[0], n, whichChain, whichPart, 1.0) == ERROR)
9590 isComplex = GetEigens (n, q[0], eigenValues, eigvalsImag, eigvecs, inverseEigvecs, Ceigvecs, CinverseEigvecs);
9591 # if defined (BEAGLE_ENABLED)
9592 if (isComplex == YES)
9594 if (isComplex == YES)
9595 MrBayesPrint ("%s ERROR: Complex eigenvalues found!\n", spacer);
9597 MrBayesPrint ("%s ERROR: Computing eigenvalues problem!\n", spacer);
9600 if (m->useBeagle == YES)
9602 /* TODO: only allocate this space once at initialization */
9603 beagleEigvecs = (double*) SafeCalloc (2*n*n, sizeof(double));
9604 beagleInverseEigvecs = beagleEigvecs + n*n;
9605 for (i=k=0; i<n; i++)
9607 // eigenValues[i] = 0.1;
9610 beagleEigvecs[k] = eigvecs[i][j];
9611 beagleInverseEigvecs[k] = inverseEigvecs[i][j];
9615 beagleSetEigenDecomposition(m->beagleInstance,
9616 m->cijkIndex[whichChain],
9618 beagleInverseEigvecs,
9620 free(beagleEigvecs);
9624 CalcCijk (n, cijk, eigvecs, inverseEigvecs);
9627 if (isComplex == NO)
9629 CalcCijk (n, cijk, eigvecs, inverseEigvecs);
9633 MrBayesPrint ("%s ERROR: Complex eigenvalues found!\n", spacer);
9640 /* Here, we calculate the rate matrices (Q) for various nucleotide and amino acid
9641 data models. Usually, when the rate matrix is set in SetNucQMatrix, it is scaled
9642 such that the average substitution rate is one. However, there is a complication
9643 for positive selection models using codon rate matrices. First, we have more than
9644 one matrix; in fact, we have as many rate matrices as there are omega values. Second,
9645 the mean substitution rate still has to be one. And third, we want the synonymous
9646 rate to be the same across the rate matrices. For positive selection models, the Q
9647 matrix comes out of SetNucQMatrix unscaled. Once we have all m->nCijkParts rate
9648 matrices, we then scale again, this time to ensure that the mean substitution rate is one. */
9650 /* First, calculate rate matrices for each category: */
9652 for (k=0; k<m->nCijkParts; k++)
9654 if (m->dataType == DNA || m->dataType == RNA)
9656 if (SetNucQMatrix (q[k], n, whichChain, whichPart, rateOmegaValues[k], &rA, &rS) == ERROR)
9661 if (SetProteinQMatrix (q[k], n, whichChain, whichPart, rateOmegaValues[k]) == ERROR)
9664 if (m->nucModelId == NUCMODEL_CODON && m->numOmegaCats > 1)
9665 posScaler += omegaCatFreq[k] * (rS + rA);
9668 /* Then rescale the rate matrices, if this is a positive selection model: */
9669 if (m->nucModelId == NUCMODEL_CODON && m->numOmegaCats > 1)
9671 posScaler = 1.0 / posScaler;
9672 for (k=0; k<m->nCijkParts; k++)
9676 q[k][i][j] *= posScaler;
9680 /* Finally, calculate eigenvalues, etc.: */
9681 # if defined (BEAGLE_ENABLED)
9682 if (m->useBeagle == YES)
9684 /* TODO: only allocate this space once at initialization */
9685 beagleEigvecs = (double*) SafeCalloc (2*n*n, sizeof(double));
9686 beagleInverseEigvecs = beagleEigvecs + n*n;
9689 for (k=0; k<m->nCijkParts; k++)
9691 isComplex = GetEigens (n, q[k], eigenValues, eigvalsImag, eigvecs, inverseEigvecs, Ceigvecs, CinverseEigvecs);
9692 # if defined (BEAGLE_ENABLED)
9693 if (isComplex == YES)
9695 if (isComplex == YES)
9696 MrBayesPrint ("%s ERROR: Complex eigenvalues found!\n", spacer);
9698 MrBayesPrint ("%s ERROR: Computing eigenvalues problem!\n", spacer);
9701 if (m->useBeagle == YES)
9703 for (i=u=0; i<n; i++)
9707 beagleEigvecs[u] = eigvecs[i][j];
9708 beagleInverseEigvecs[u] = inverseEigvecs[i][j];
9713 beagleSetEigenDecomposition(m->beagleInstance,
9714 m->cijkIndex[whichChain] + k,
9716 beagleInverseEigvecs,
9721 CalcCijk (n, cijk, eigvecs, inverseEigvecs);
9724 if (isComplex == NO)
9726 CalcCijk (n, cijk, eigvecs, inverseEigvecs);
9730 MrBayesPrint ("%s ERROR: Complex eigenvalues found!\n", spacer);
9734 /* shift pointers */
9735 eigenValues += sizeOfSingleCijk;
9736 eigvalsImag += sizeOfSingleCijk;
9737 cijk += sizeOfSingleCijk;
9739 # if defined (BEAGLE_ENABLED)
9740 free(beagleEigvecs);
9745 for (k=0; k<numQAllocated; k++)
9746 FreeSquareDoubleMatrix (q[k]);
9747 FreeSquareDoubleMatrix (eigvecs);
9748 FreeSquareDoubleMatrix (inverseEigvecs);
9749 FreeSquareComplexMatrix (Ceigvecs);
9750 FreeSquareComplexMatrix (CinverseEigvecs);
9756 for (k=0; k<numQAllocated; k++)
9757 FreeSquareDoubleMatrix (q[k]);
9758 FreeSquareDoubleMatrix (eigvecs);
9759 FreeSquareDoubleMatrix (inverseEigvecs);
9760 FreeSquareComplexMatrix (Ceigvecs);
9761 FreeSquareComplexMatrix (CinverseEigvecs);