]> git.donarmstrong.com Git - mrbayes.git/blob - src/mbbeagle.c
import mrbayes
[mrbayes.git] / src / mbbeagle.c
1 /*
2  *  MrBayes 3
3  *
4  *  (c) 2002-2013
5  *
6  *  This file originally contributed by:
7  *
8  *  Marc A. Suchard
9  *  Department of Biomathematics
10  *  University of California, Los Angeles
11  *  Los Angeles, CA 90095
12  *  msuchard@ucla.edu
13  *
14  *  With important contributions by
15  *
16  *  Maxim Teslenko (maxim.teslenko@nrm.se)
17  *
18  *  and by many users (run 'acknowledgments' to see more info)
19  *
20  * This program is free software; you can redistribute it and/or
21  * modify it under the terms of the GNU General Public License
22  * as published by the Free Software Foundation; either version 2
23  * of the License, or (at your option) any later version.
24  *
25  * This program is distributed in the hope that it will be useful,
26  * but WITHOUT ANY WARRANTY; without even the implied warranty of
27  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
28  * GNU General Public License for more details (www.gnu.org).
29  *
30  */
31
32 #include "bayes.h"
33 #include "mbbeagle.h"
34 #include "mcmc.h"
35 #include "model.h"
36 #include "utils.h"
37
38 const char* const svnRevisionMbbeagleC = "$Rev: 1008 $";   /* Revision keyword which is expended/updated by svn on each commit/update */
39
40 /* Functions and variables defined in mcmc.c that are not exported in mcmc.h */
41 void    LaunchLogLikeForDivision(int chain, int d, MrBFlt* lnL);
42
43 void    FlipCondLikeSpace (ModelInfo *m, int chain, int nodeIndex);
44 void    FlipNodeScalerSpace (ModelInfo *m, int chain, int nodeIndex);
45 void    FlipSiteScalerSpace (ModelInfo *m, int chain);
46 void    FlipTiProbsSpace (ModelInfo *m, int chain, int nodeIndex);
47 void    ResetSiteScalers (ModelInfo *m, int chain);
48 void    CopySiteScalers (ModelInfo *m, int chain);
49
50 int     TreeCondLikes_Beagle_No_Rescale (Tree *t, int division, int chain);
51 int     TreeCondLikes_Beagle_Rescale_All (Tree *t, int division, int chain);
52
53 extern int *chainId;
54 extern int numLocalChains;
55
56
57 #if defined (BEAGLE_ENABLED)
58 /*------------------------------------------------------------------------
59 |
60 |   InitBeagleInstance: create and initialize a beagle instance
61 |
62 -------------------------------------------------------------------------*/
63 int InitBeagleInstance (ModelInfo *m, int division)
64 {
65     int                     i, j, k, c, s, *inStates, numPartAmbigTips;
66     double                  *inPartials;
67     BitsLong                *charBits;
68     BeagleInstanceDetails   details;
69     long preferedFlags, requiredFlags;
70     int resource;
71     
72     if (m->useBeagle == NO)
73         return ERROR;
74     
75     /* at least one eigen buffer needed */
76     if (m->nCijkParts == 0)
77         m->nCijkParts = 1;
78
79     /* allocate memory used by beagle */
80     m->logLikelihoods          = (MrBFlt *) SafeCalloc ((numLocalChains)*m->numChars, sizeof(MrBFlt));
81     m->inRates                 = (MrBFlt *) SafeCalloc (m->numGammaCats, sizeof(MrBFlt));
82     m->branchLengths           = (MrBFlt *) SafeCalloc (2*numLocalTaxa, sizeof(MrBFlt));
83     m->tiProbIndices           = (int *) SafeCalloc (2*numLocalTaxa, sizeof(int));
84     m->inWeights               = (MrBFlt *) SafeCalloc (m->numGammaCats*m->nCijkParts, sizeof(MrBFlt));
85     m->bufferIndices           = (int *) SafeCalloc (m->nCijkParts, sizeof(int));
86     m->eigenIndices            = (int *) SafeCalloc (m->nCijkParts, sizeof(int));
87     m->childBufferIndices      = (int *) SafeCalloc (m->nCijkParts, sizeof(int));
88     m->childTiProbIndices      = (int *) SafeCalloc (m->nCijkParts, sizeof(int));
89     m->cumulativeScaleIndices  = (int *) SafeCalloc (m->nCijkParts, sizeof(int));
90
91     numPartAmbigTips = 0;
92     if (m->numStates != m->numModelStates)
93         numPartAmbigTips = numLocalTaxa;
94     else
95         {
96         for (i=0; i<numLocalTaxa; i++)
97             {
98             if (m->isPartAmbig[i] == YES)
99                 numPartAmbigTips++;
100             }
101         }
102
103     if (beagleResourceNumber >= 0 && beagleResourceNumber != 99)
104         {
105         resource = beagleResourceNumber;
106         beagleResourceCount = 1;
107         }
108     else if (beagleResourceCount != 0) 
109         {
110         resource = beagleResource[beagleInstanceCount % beagleResourceCount];
111         }
112     preferedFlags = beagleFlags;
113     
114     requiredFlags = 0L;
115     
116     if (beagleScalingScheme == MB_BEAGLE_SCALE_ALWAYS)
117         requiredFlags |= BEAGLE_FLAG_SCALERS_LOG; //BEAGLE_FLAG_SCALERS_RAW;
118
119     /* TODO: allocate fewer buffers when nCijkParts > 1 */
120     /* create beagle instance */
121     m->beagleInstance = beagleCreateInstance(numLocalTaxa,
122                                              m->numCondLikes * m->nCijkParts,
123                                              numLocalTaxa - numPartAmbigTips,
124                                              m->numModelStates,
125                                              m->numChars,
126                                             (numLocalChains + 1) * m->nCijkParts,
127                                              m->numTiProbs*m->nCijkParts,
128                                              m->numGammaCats,
129                                              m->numScalers * m->nCijkParts,
130                                              (beagleResourceCount == 0 ? NULL : &resource),
131                                              (beagleResourceCount == 0 ? 0 : 1),                                             
132                                              preferedFlags,
133                                              requiredFlags,
134                                              &details);
135
136     if (m->beagleInstance < 0)
137         {
138         MrBayesPrint ("%s   Failed to start BEAGLE instance\n", spacer);
139         return (ERROR);
140         }
141     else
142         {
143         MrBayesPrint ("\n%s   Using BEAGLE resource %i for division %d:", spacer, details.resourceNumber, division+1);
144 #   if defined (THREADS_ENABLED)
145         MrBayesPrint (" (%s)\n", (tryToUseThreads ? "threaded" : "non-threaded"));
146 #   else
147         MrBayesPrint (" (non-threaded)\n");
148 #   endif
149         MrBayesPrint ("%s      Rsrc Name : %s\n", spacer, details.resourceName);
150         MrBayesPrint ("%s      Impl Name : %s\n", spacer, details.implName);
151         MrBayesPrint ("%s      Flags:", spacer);
152         BeaglePrintFlags(details.flags);
153         MrBayesPrint ("\n");
154         beagleInstanceCount++;          
155         }
156
157     /* initialize tip data */
158     inStates = (int *) SafeMalloc (m->numChars * sizeof(int));
159     if (!inStates)
160         return ERROR;
161     inPartials = (double *) SafeMalloc (m->numChars * m->numModelStates * sizeof(double));
162     if (!inPartials)
163         return ERROR;
164     for (i=0; i<numLocalTaxa; i++)
165         {
166         if (m->isPartAmbig[i] == NO)
167             {
168             charBits = m->parsSets[i];
169             for (c=0; c<m->numChars; c++)
170                 {
171                 for (s=j=0; s<m->numModelStates; s++)
172                     {
173                     if (IsBitSet(s, charBits))
174                         {
175                         inStates[c] = s;
176                         j++;
177                         }
178                     }
179                 if (j == m->numModelStates)
180                     inStates[c] = j;
181                 else
182                     assert (j==1);
183                 charBits += m->nParsIntsPerSite;
184                 }
185             beagleSetTipStates(m->beagleInstance, i, inStates);
186             }
187         else /* if (m->isPartAmbig == YES) */
188             {
189             k = 0;
190             charBits = m->parsSets[i];
191             for (c=0; c<m->numChars; c++)
192                 {
193                 for (s=0; s<m->numModelStates; s++)
194                     {
195                     if (IsBitSet(s%m->numStates, charBits))
196                         inPartials[k++] = 1.0;
197                     else
198                         inPartials[k++] = 0.0;
199                     }
200                 charBits += m->nParsIntsPerSite;
201                 }
202             beagleSetTipPartials(m->beagleInstance, i, inPartials);
203             }
204         }
205     free (inStates);
206     free (inPartials);
207
208     return NO_ERROR;
209 }
210
211
212 /*-----------------------------------------------------------------
213 |
214 |   LaunchBEAGLELogLikeForDivision: calculate the log likelihood  
215 |       of the new state of the chain for a single division
216 |
217 -----------------------------------------------------------------*/
218 void LaunchBEAGLELogLikeForDivision(int chain, int d, ModelInfo* m, Tree* tree, MrBFlt* lnL)
219 {
220     int i, rescaleFreqNew;
221     int *isScalerNode;
222     TreeNode *p;
223     
224     if (beagleScalingScheme == MB_BEAGLE_SCALE_ALWAYS) 
225         {
226     
227 #   if defined (DEBUG_MB_BEAGLE_FLOW)
228         MrBayesPrint ("ALWAYS RESCALING\n");
229 #   endif
230         /* Flip and copy or reset site scalers */
231         FlipSiteScalerSpace(m, chain);
232         if (m->upDateAll == YES) {
233             for (i=0; i<m->nCijkParts; i++) {           
234                 beagleResetScaleFactors(m->beagleInstance, m->siteScalerIndex[chain] + i);
235                 }
236             }
237         else
238             CopySiteScalers(m, chain);
239
240         TreeTiProbs_Beagle(tree, d, chain);
241         TreeCondLikes_Beagle(tree, d, chain);
242         TreeLikelihood_Beagle(tree, d, chain, lnL, (chainId[chain] % chainParams.numChains));
243         } 
244     else 
245         { /* MB_BEAGLE_SCALE_DYNAMIC */
246     
247         /* This flag is only valid within this block */
248         m->rescaleBeagleAll = NO;        
249         TreeTiProbs_Beagle(tree, d, chain);
250         if (m->succesCount[chain] > 1000)
251             {
252             m->succesCount[chain] = 10;
253             m->rescaleFreq[chain]++; /* increase rescaleFreq independent of whether we accept or reject new state */
254             m->rescaleFreqOld = rescaleFreqNew = m->rescaleFreq[chain];
255             for (i=0; i<tree->nIntNodes; i++)
256                 {
257                 p = tree->intDownPass[i];
258                 if (p->upDateCl == YES) {
259                      /* flip to the new workspace since TreeCondLikes_Beagle_Rescale_All() does not do it for
260                         (p->upDateCl == YES) since it assumes that TreeCondLikes_Beagle_No_Rescale() did it */
261                     FlipCondLikeSpace (m, chain, p->index);
262                    }
263                 }
264             goto rescale_all;
265             }
266
267         if (beagleScalingFrequency != 0 && 
268             m->beagleComputeCount[chain] % (beagleScalingFrequency) == 0)
269             {
270             m->rescaleFreqOld = rescaleFreqNew = m->rescaleFreq[chain];
271             for (i=0; i<tree->nIntNodes; i++)
272                 {
273                 p = tree->intDownPass[i];
274                 if (p->upDateCl == YES) {
275                      /* flip to the new workspace since TreeCondLikes_Beagle_Rescale_All() does not do it for
276                         (p->upDateCl == YES) since it assumes that TreeCondLikes_Beagle_No_Rescale() did it */
277                     FlipCondLikeSpace (m, chain, p->index);
278                    }
279                 }
280             goto rescale_all;
281             }
282
283         TreeCondLikes_Beagle_No_Rescale(tree, d, chain);
284
285         /* Check if likelihood is valid */      
286         if (TreeLikelihood_Beagle(tree, d, chain, lnL, (chainId[chain] % chainParams.numChains)) == BEAGLE_ERROR_FLOATING_POINT) 
287             {
288             m->rescaleFreqOld = rescaleFreqNew = m->rescaleFreq[chain];
289             if (rescaleFreqNew > 1 && m->succesCount[chain] < 40)
290                 {
291                 if (m->succesCount[chain] < 10)
292                     {
293                     if (m->succesCount[chain] < 4)
294                         {
295                         rescaleFreqNew -= rescaleFreqNew >> 3; /* <== we cut up to 12,5% of rescaleFreq */
296                         if (m->succesCount[chain] < 2)
297                             {
298                             rescaleFreqNew -= rescaleFreqNew >> 3;
299                             /* to avoid situation when we may stack at high rescaleFreq when new states do not get accepted because of low liklihood but there proposed frequency is high we reduce rescaleFreq even if we reject the last move*/
300                             /* basically the higher probability of proposing of low liklihood state which needs smaller rescaleFreq would lead to higher probability of hitting this code which should reduce rescaleFreqOld thus reduce further probability of hitting this code */
301                             /* at some point this negative feedback mechanism should get in balance with the mechanism of periodically increasing rescaleFreq when long sequence of successes is achieved*/
302                             m->rescaleFreqOld -= m->rescaleFreqOld >> 3;
303                             }
304                         m->rescaleFreqOld -= m->rescaleFreqOld >> 3;
305                         m->rescaleFreqOld--;
306                         m->rescaleFreqOld = (m->rescaleFreqOld ? m->rescaleFreqOld:1);
307                         m->recalculateScalers = YES; 
308                         recalcScalers = YES;
309                         }
310                     }
311                 rescaleFreqNew--;
312                 rescaleFreqNew = (rescaleFreqNew ? rescaleFreqNew : 1);
313                 }
314             m->succesCount[chain] = 0;
315     rescale_all:
316 #   if defined (DEBUG_MB_BEAGLE_FLOW)
317             MrBayesPrint ("NUMERICAL RESCALING\n");
318 #   endif
319
320             m->rescaleBeagleAll = YES;
321             FlipSiteScalerSpace(m, chain);
322             isScalerNode = m->isScalerNode[chain];
323     while_loop:
324             ResetScalersPartition (isScalerNode, tree, rescaleFreqNew);
325             for (i=0; i<m->nCijkParts; i++) 
326                 {           
327                 beagleResetScaleFactors(m->beagleInstance, m->siteScalerIndex[chain] + i);
328                 }
329             
330             TreeCondLikes_Beagle_Rescale_All (tree, d, chain);
331             if (TreeLikelihood_Beagle(tree, d, chain, lnL, (chainId[chain] % chainParams.numChains)) == BEAGLE_ERROR_FLOATING_POINT)
332                 {
333                 if (rescaleFreqNew > 1)
334                     {
335                     /* Swap back scalers which were swapped in TreeCondLikes_Beagle_Rescale_All() */
336                     for (i=0; i<tree->nIntNodes; i++)
337                         {
338                         p = tree->intDownPass[i];
339                         if (isScalerNode[p->index] == YES)
340                             FlipNodeScalerSpace (m, chain, p->index);
341                         }
342                     rescaleFreqNew -= rescaleFreqNew >> 3; /* <== we cut up to 12,5% of rescaleFreq */
343                     rescaleFreqNew--;                      /* we cut extra 1 of rescaleFreq */
344                     goto while_loop;
345                     }
346                 }
347             m->rescaleFreq[chain] = rescaleFreqNew;
348             }
349         }
350     
351     /* Count number of evaluations */
352     m->beagleComputeCount[chain]++;
353 }
354
355
356 void recalculateScalers(int chain)
357 {
358     int         i, d, rescaleFreqNew;
359     int         *isScalerNode;
360     ModelInfo*  m;
361     Tree        *tree;
362
363     for (d=0; d<numCurrentDivisions; d++)
364         {
365         m = &modelSettings[d];
366         if (m->recalculateScalers == YES)
367             {
368             m->recalculateScalers = NO;
369             tree = GetTree(m->brlens, chain, state[chain]);
370
371             rescaleFreqNew = m->rescaleFreq[chain];
372             isScalerNode = m->isScalerNode[chain];
373
374             ResetScalersPartition (isScalerNode, tree, rescaleFreqNew);
375             for (i=0; i<m->nCijkParts; i++) {           
376                 beagleResetScaleFactors(m->beagleInstance, m->siteScalerIndex[chain] + i);
377             }
378             /* here it does not matter if we flip CL space or not */
379             TreeCondLikes_Beagle_Rescale_All (tree, d, chain);
380             }
381         }
382 }
383
384
385 void BeagleAddGPUDevicesToList(int **newResourceList, int *beagleResourceCount)
386 {
387     BeagleResourceList* beagleResources;
388     int i, gpuCount;
389     
390     beagleResources = beagleGetResourceList();
391     if (*newResourceList == NULL) {
392         *newResourceList = (int*) SafeCalloc(sizeof(int), beagleResources->length);
393     }
394     gpuCount = 0;
395     for (i = 0; i < beagleResources->length; i++) {
396         if (beagleResources->list[i].supportFlags & BEAGLE_FLAG_PROCESSOR_GPU) {
397             (*newResourceList)[gpuCount] = i;
398             gpuCount++;
399         }
400     }
401     *beagleResourceCount = gpuCount;            
402 }
403
404
405 void BeagleRemoveGPUDevicesFromList(int **beagleResource, int *beagleResourceCount)
406 {
407     *beagleResourceCount = 0;
408 }
409
410
411 /*-----
412 |
413 | BeaglePrintResources: outputs the available BEAGLE resources
414 |
415 ----------*/
416 void BeaglePrintResources()
417 {
418     int i;
419     BeagleResourceList* beagleResources;
420     
421     beagleResources = beagleGetResourceList();
422     MrBayesPrint ("%s   Available resources reported by beagle library:\n", spacer);
423     for (i=0; i<beagleResources->length; i++) 
424         {
425         MrBayesPrint ("\tResource %i:\n", i);       
426         MrBayesPrint ("\tName: %s\n", beagleResources->list[i].name);
427         if (i > 0) 
428             {
429             MrBayesPrint ("\tDesc: %s\n", beagleResources->list[i].description);
430             }
431         MrBayesPrint ("\tFlags:");
432         BeaglePrintFlags(beagleResources->list[i].supportFlags);
433         MrBayesPrint ("\n\n");
434         }
435     MrBayesPrint ("%s   BEAGLE vesion: %s\n", spacer, beagleGetVersion());
436 }
437
438
439 int BeagleCheckFlagCompatability(long inFlags)
440 {
441     if (inFlags & BEAGLE_FLAG_PROCESSOR_GPU) {
442         if (inFlags & BEAGLE_FLAG_VECTOR_SSE) {
443             MrBayesPrint ("%s   Simultaneous use of GPU and SSE not available.\n", spacer);
444             return NO;
445         }
446         if (inFlags & BEAGLE_FLAG_THREADING_OPENMP) {
447             MrBayesPrint ("%s   Simultaneous use of GPU and OpenMP not available.\n", spacer);
448             return NO;
449         }
450     }
451
452     return YES;
453 }
454
455
456 /*-------------------
457 |
458 |  BeaglePrintFlags: outputs beagle instance details
459 |
460 ______________________*/
461 void BeaglePrintFlags(long inFlags) 
462 {
463     int     i, k;
464     char *names[] = { "PROCESSOR_CPU",
465                       "PROCESSOR_GPU",
466                       "PROCESSOR_FPGA",
467                       "PROCESSOR_CELL",
468                       "PRECISION_DOUBLE",
469                       "PRECISION_SINGLE",
470                       "COMPUTATION_ASYNCH",
471                       "COMPUTATION_SYNCH",
472                       "EIGEN_REAL",
473                       "EIGEN_COMPLEX",
474                       "SCALING_MANUAL",
475                       "SCALING_AUTO",
476                       "SCALING_ALWAYS",
477                       "SCALING_DYNAMIC",
478                       "SCALERS_RAW",
479                       "SCALERS_LOG",
480                       "VECTOR_NONE",
481                       "VECTOR_SSE",
482                       "THREADING_NONE",
483                       "THREADING_OPENMP"
484                     };
485     long flags[] = { BEAGLE_FLAG_PROCESSOR_CPU,
486                      BEAGLE_FLAG_PROCESSOR_GPU,
487                      BEAGLE_FLAG_PROCESSOR_FPGA,
488                      BEAGLE_FLAG_PROCESSOR_CELL,
489                      BEAGLE_FLAG_PRECISION_DOUBLE,
490                      BEAGLE_FLAG_PRECISION_SINGLE,
491                      BEAGLE_FLAG_COMPUTATION_ASYNCH,
492                      BEAGLE_FLAG_COMPUTATION_SYNCH,
493                      BEAGLE_FLAG_EIGEN_REAL,
494                      BEAGLE_FLAG_EIGEN_COMPLEX,
495                      BEAGLE_FLAG_SCALING_MANUAL,
496                      BEAGLE_FLAG_SCALING_AUTO,
497                      BEAGLE_FLAG_SCALING_ALWAYS,
498                      BEAGLE_FLAG_SCALING_DYNAMIC,
499                      BEAGLE_FLAG_SCALERS_RAW,
500                      BEAGLE_FLAG_SCALERS_LOG,
501                      BEAGLE_FLAG_VECTOR_NONE,
502                      BEAGLE_FLAG_VECTOR_SSE,
503                      BEAGLE_FLAG_THREADING_NONE,
504                      BEAGLE_FLAG_THREADING_OPENMP
505                     };
506
507     for (i=k=0; i<20; i++)
508         {
509         if (inFlags & flags[i])
510             {
511             if (k%4 == 0 && k > 0)
512                 MrBayesPrint ("\n%s            ", spacer);
513             MrBayesPrint (" %s", names[i]);
514             k++;
515             }
516         }
517 }
518
519
520 #if defined(THREADS_ENABLED)
521 void *LaunchThreadLogLikeForDivision(void *arguments)
522 {
523     int d, chain;
524     MrBFlt *lnL;
525     LaunchStruct* launchStruct;
526     
527     launchStruct = (LaunchStruct*) arguments;
528     chain = launchStruct->chain;
529     d = launchStruct->division;
530     lnL = launchStruct->lnL;
531     LaunchLogLikeForDivision(chain, d, lnL);    
532     return 0;
533 }
534
535
536 MrBFlt LaunchLogLikeForAllDivisionsInParallel(int chain)
537 {
538     int d;
539     int threadError;
540     pthread_t* threads;
541     LaunchStruct* launchValues;
542     int* wait;
543     ModelInfo* m;
544     MrBFlt chainLnLike;
545     
546     chainLnLike = 0.0;
547
548     /* TODO Initialize only once */
549     threads = (pthread_t*) SafeMalloc(sizeof(pthread_t) * numCurrentDivisions);
550     launchValues = (LaunchStruct*) SafeMalloc(sizeof(LaunchStruct) * numCurrentDivisions);
551     wait = (int*) SafeMalloc(sizeof(int) * numCurrentDivisions);
552     
553     /* Cycle through divisions and recalculate tis and cond likes as necessary. */
554     /* Code below does not try to avoid recalculating ti probs for divisions    */
555     /* that could share ti probs with other divisions.                          */
556     for (d=0; d<numCurrentDivisions; d++)
557         {
558         
559 #   if defined (BEST_MPI_ENABLED)
560         if (isDivisionActive[d] == NO)
561             continue;
562 #   endif
563         m = &modelSettings[d];
564         
565         if (m->upDateCl == YES) 
566             {                   
567             launchValues[d].chain = chain;
568             launchValues[d].division = d;
569             launchValues[d].lnL = &(m->lnLike[2*chain + state[chain]]);
570             /* Fork */
571             threadError = pthread_create(&threads[d], NULL, 
572                                          LaunchThreadLogLikeForDivision, 
573                                          (void*) &launchValues[d]);
574             assert (0 == threadError);
575             wait[d] = 1;                    
576             }           
577         else 
578             {
579             wait[d] = 0;
580             }
581         }
582     
583     for (d = 0; d < numCurrentDivisions; d++)
584         {
585         /* Join */
586         if (wait[d]) 
587             {
588             threadError = pthread_join(threads[d], NULL);
589             assert (0 == threadError);
590             }               
591         m = &modelSettings[d];
592         chainLnLike += m->lnLike[2*chain + state[chain]];
593         }
594             
595     /* TODO Free these once */
596     free(threads);
597     free(launchValues);
598     free(wait);
599     
600     return chainLnLike;
601 }
602 #endif
603
604
605 int ScheduleLogLikeForAllDivisions()
606 {
607     int d;
608     int divisionsToLaunch = 0;
609     ModelInfo       *m;
610     
611     if (numCurrentDivisions < 2) {
612         return 0;
613         }
614     
615     for (d=0; d<numCurrentDivisions; d++) {
616         m = &modelSettings[d];
617         if (m->upDateCl == YES) {
618             divisionsToLaunch++;
619             }
620         }
621     
622     return (divisionsToLaunch > 1);
623 }
624
625
626 /*----------------------------------------------------------------
627  |
628  |  TreeCondLikes_Beagle: This routine updates all conditional
629  |       (partial) likelihoods of a beagle instance while doing no rescaling.
630  |      That potentialy can make final liklihood bad then calculation with rescaling needs to be done.
631  |
632  -----------------------------------------------------------------*/
633 int TreeCondLikes_Beagle_No_Rescale (Tree *t, int division, int chain)
634 {
635     int                 i, j, cumulativeScaleIndex;
636     BeagleOperation     operations;
637     TreeNode            *p;
638     ModelInfo           *m;
639     unsigned            chil1Step, chil2Step;
640     int                 *isScalerNode;
641     
642     m = &modelSettings[division];
643     isScalerNode = m->isScalerNode[chain];
644     
645     for (i=0; i<t->nIntNodes; i++)
646     {
647         p = t->intDownPass[i];
648         
649         /* check if conditional likelihoods need updating */
650         if (p->upDateCl == YES)
651         {
652             /* flip to the new workspace */
653             FlipCondLikeSpace (m, chain, p->index);
654             
655             /* update conditional likelihoods */
656             operations.destinationPartials    = m->condLikeIndex[chain][p->index       ];
657             operations.child1Partials         = m->condLikeIndex[chain][p->left->index ];
658             operations.child1TransitionMatrix = m->tiProbsIndex [chain][p->left->index ];
659             operations.child2Partials         = m->condLikeIndex[chain][p->right->index];
660             operations.child2TransitionMatrix = m->tiProbsIndex [chain][p->right->index];
661             
662             /* All partials for tips are the same across omega categories, thus we are doing the following two if statments.*/
663             if (p->left->left== NULL)
664                 chil1Step=0;
665             else
666                 chil1Step=1;
667             
668             if (p->right->left== NULL)
669                 chil2Step=0;
670             else
671                 chil2Step=1;
672             
673             operations.destinationScaleWrite = BEAGLE_OP_NONE;
674             cumulativeScaleIndex  = BEAGLE_OP_NONE;
675             if (isScalerNode[p->index] == YES)
676                 {
677                 operations.destinationScaleRead  = m->nodeScalerIndex[chain][p->index];
678                 }
679             else
680                 {
681                 operations.destinationScaleRead  = BEAGLE_OP_NONE;
682                 }
683             
684             for (j=0; j<m->nCijkParts; j++)
685             {
686                 beagleUpdatePartials(m->beagleInstance,
687                                      &operations,
688                                      1,
689                                      cumulativeScaleIndex);
690                 
691                 operations.destinationPartials++;
692                 operations.child1Partials+=chil1Step;
693                 operations.child1TransitionMatrix++;
694                 operations.child2Partials+=chil2Step;
695                 operations.child2TransitionMatrix++;
696                 
697                 if (isScalerNode[p->index] == YES)
698                     operations.destinationScaleRead++;
699             }
700         }
701     }
702     
703     return NO_ERROR;
704 }
705
706
707 /*----------------------------------------------------------------
708  |
709  |  TreeCondLikes_Beagle: This routine updates all conditional
710  |       (partial) likelihoods of a beagle instance while rescaling at every node.
711  |        Note: all nodes get recalculated, not only tached by move.
712  |
713  -----------------------------------------------------------------*/
714 int TreeCondLikes_Beagle_Rescale_All (Tree *t, int division, int chain)
715 {
716     int                 i, j, cumulativeScaleIndex;
717     BeagleOperation     operations;
718     TreeNode            *p;
719     ModelInfo           *m;
720     unsigned            chil1Step, chil2Step;
721     int                 *isScalerNode;
722     
723     m = &modelSettings[division];
724     isScalerNode = m->isScalerNode[chain];
725     
726     for (i=0; i<t->nIntNodes; i++)
727     {
728         p = t->intDownPass[i];
729         
730         if (p->upDateCl == NO) {
731             //p->upDateCl = YES;
732             /* flip to the new workspace */
733             FlipCondLikeSpace (m, chain, p->index);
734         }
735         
736         /* update conditional likelihoods */
737         operations.destinationPartials    = m->condLikeIndex[chain][p->index       ];
738         operations.child1Partials         = m->condLikeIndex[chain][p->left->index ];
739         operations.child1TransitionMatrix = m->tiProbsIndex [chain][p->left->index ];
740         operations.child2Partials         = m->condLikeIndex[chain][p->right->index];
741         operations.child2TransitionMatrix = m->tiProbsIndex [chain][p->right->index];
742         
743         /* All partials for tips are the same across omega catigoris, thus we are doing the following two if statments.*/
744         if (p->left->left== NULL)
745             chil1Step=0;
746         else
747             chil1Step=1;
748         
749         if (p->right->left== NULL)
750             chil2Step=0;
751         else
752             chil2Step=1;
753
754         operations.destinationScaleRead = BEAGLE_OP_NONE;
755         if (isScalerNode[p->index] == YES)
756             {
757             FlipNodeScalerSpace (m, chain, p->index);
758             operations.destinationScaleWrite = m->nodeScalerIndex[chain][p->index];
759             cumulativeScaleIndex  = m->siteScalerIndex[chain];
760             }
761         else
762             {
763             operations.destinationScaleWrite = BEAGLE_OP_NONE;
764             cumulativeScaleIndex  = BEAGLE_OP_NONE;
765             }
766         
767         
768         
769         for (j=0; j<m->nCijkParts; j++)
770         {
771             beagleUpdatePartials(m->beagleInstance,
772                                  &operations,
773                                  1,
774                                  cumulativeScaleIndex);
775             
776             operations.destinationPartials++;
777             operations.child1Partials+=chil1Step;
778             operations.child1TransitionMatrix++;
779             operations.child2Partials+=chil2Step;
780             operations.child2TransitionMatrix++;
781             
782             if (isScalerNode[p->index] == YES) {
783                 operations.destinationScaleWrite++;
784                 cumulativeScaleIndex++;
785                 }
786
787             }
788         }
789
790     return NO_ERROR;
791 }
792
793
794 /*----------------------------------------------------------------
795 |
796 |   TreeCondLikes_Beagle: This routine updates all conditional
797 |       (partial) likelihoods of a beagle instance.
798 |
799 -----------------------------------------------------------------*/
800 int TreeCondLikes_Beagle (Tree *t, int division, int chain)
801 {
802     int                 i, j, destinationScaleRead, cumulativeScaleIndex;
803     BeagleOperation     operations;
804     TreeNode            *p;
805     ModelInfo           *m;
806     unsigned            chil1Step, chil2Step;
807     
808     m = &modelSettings[division];
809     
810     for (i=0; i<t->nIntNodes; i++)
811         {
812         p = t->intDownPass[i];
813         
814         /* check if conditional likelihoods need updating */
815         if (p->upDateCl == YES)
816             {
817             /* remove old scalers */
818             if (m->scalersSet[chain][p->index] == YES && m->upDateAll == NO)
819                 {
820                 destinationScaleRead = m->nodeScalerIndex[chain][p->index];
821                 cumulativeScaleIndex = m->siteScalerIndex[chain];
822                 for (j=0; j<m->nCijkParts; j++)
823                     {
824                     beagleRemoveScaleFactors(m->beagleInstance,
825                                              &destinationScaleRead,
826                                              1,
827                                              cumulativeScaleIndex);
828                     destinationScaleRead++;
829                     cumulativeScaleIndex++;
830                     }
831                 }
832
833             /* flip to the new workspace */
834             FlipCondLikeSpace (m, chain, p->index);
835             FlipNodeScalerSpace (m, chain, p->index);
836             m->scalersSet[chain][p->index] = NO;
837             
838             /* update conditional likelihoods */
839             operations.destinationPartials    = m->condLikeIndex[chain][p->index       ];
840             operations.child1Partials         = m->condLikeIndex[chain][p->left->index ];
841             operations.child1TransitionMatrix = m->tiProbsIndex [chain][p->left->index ];
842             operations.child2Partials         = m->condLikeIndex[chain][p->right->index];
843             operations.child2TransitionMatrix = m->tiProbsIndex [chain][p->right->index];
844
845             /* All partials for tips are the same across omega catigoris, thus we are doing the following two if statments.*/
846             if (p->left->left== NULL && p->left->right== NULL)
847                 chil1Step=0;
848             else
849                 chil1Step=1;
850
851             if (p->right->left== NULL && p->right->right== NULL)
852                 chil2Step=0;
853             else
854                 chil2Step=1;
855
856             if (p->scalerNode == YES)
857                 {
858                 m->scalersSet[chain][p->index] = YES;
859                 operations.destinationScaleWrite = m->nodeScalerIndex[chain][p->index];
860                 cumulativeScaleIndex  = m->siteScalerIndex[chain];
861                 }
862             else
863                 {
864                 operations.destinationScaleWrite = BEAGLE_OP_NONE;
865                 cumulativeScaleIndex  = BEAGLE_OP_NONE;
866                 }
867             operations.destinationScaleRead = BEAGLE_OP_NONE;
868
869             for (j=0; j<m->nCijkParts; j++)
870                 {
871                 beagleUpdatePartials(m->beagleInstance,
872                                      &operations,
873                                      1,
874                                      cumulativeScaleIndex);
875
876                 operations.destinationPartials++;
877                 operations.child1Partials+=chil1Step;
878                 operations.child1TransitionMatrix++;
879                 operations.child2Partials+=chil2Step;
880                 operations.child2TransitionMatrix++;
881                 if (p->scalerNode == YES)
882                     {
883                     operations.destinationScaleWrite++;
884                     cumulativeScaleIndex++;
885                     }
886                 }
887
888             }
889         } /* end of for */
890
891     return NO_ERROR;
892 }
893
894
895 /**---------------------------------------------------------------------------
896 |
897 |   TreeLikelihood_Beagle: Accumulate the log likelihoods calculated by Beagle
898 |      at the root.
899 |
900 ---------------------------------------- -------------------------------------*/
901 int TreeLikelihood_Beagle (Tree *t, int division, int chain, MrBFlt *lnL, int whichSitePats)
902 {
903     int         i, j, c = 0, nStates, hasPInvar, beagleReturn;
904     MrBFlt      *swr, s01, s10, probOn, probOff, covBF[40], pInvar=0.0, *bs, freq, likeI, lnLikeI, diff, *omegaCatFreq;
905     CLFlt       *clInvar=NULL, *nSitesOfPat;
906     double      *nSitesOfPat_Beagle;
907     TreeNode    *p;
908     ModelInfo   *m;
909     double      pUnobserved;
910
911 #   if defined (MB_PRINT_DYNAMIC_RESCALE_FAIL_STAT)
912     static unsigned countBeagleDynamicFail=0;
913     static unsigned countALL=0;
914 #   endif
915
916     /* find root node */
917     p = t->root->left;
918
919     /* find model settings and nStates, pInvar, invar cond likes */
920     m = &modelSettings[division];
921     
922     nStates = m->numModelStates;
923     if (m->pInvar == NULL)
924         {
925         hasPInvar = NO;
926         }
927     else
928         {
929         hasPInvar = YES;
930         pInvar =  *(GetParamVals (m->pInvar, chain, state[chain]));
931         clInvar = m->invCondLikes;
932         }
933     
934     /* find base frequencies */
935     bs = GetParamSubVals (m->stateFreq, chain, state[chain]);
936     
937     /* if covarion model, adjust base frequencies */
938     if (m->switchRates != NULL)
939         {
940         /* find the stationary frequencies */
941         swr = GetParamVals(m->switchRates, chain, state[chain]);
942         s01 = swr[0];
943         s10 = swr[1];
944         probOn = s01 / (s01 + s10);
945         probOff =  1.0 - probOn;
946
947         /* now adjust the base frequencies; on-state stored first in cond likes */
948         for (j=0; j<nStates/2; j++)
949             {
950             covBF[j] = bs[j] * probOn;
951             covBF[j+nStates/2] = bs[j] * probOff;
952             }
953
954         /* finally set bs pointer to adjusted values */
955         bs = covBF;
956         }
957
958     /* TODO Really only need to check if state frequencies have changed */
959     if (m->upDateCijk == YES)
960         {
961         /* set base frequencies in BEAGLE instance */
962         for (i=0; i<m->nCijkParts; i++)
963             beagleSetStateFrequencies(m->beagleInstance,
964                                       m->cijkIndex[chain] + i,
965                                       bs);
966         }
967
968     /* find category frequencies */
969     if (hasPInvar == NO)
970         freq = 1.0 / m->numGammaCats;
971     else
972         freq = (1.0 - pInvar) / m->numGammaCats;
973
974     /* TODO: cat weights only need to be set when they change */
975     /* set category frequencies in beagle instance */
976     if (m->numOmegaCats > 1)
977         {
978         omegaCatFreq = GetParamSubVals(m->omega, chain, state[chain]);
979         for (i=0; i<m->nCijkParts; i++)
980             {
981             for (j=0; j<m->numGammaCats; j++)
982                 m->inWeights[j] = freq * omegaCatFreq[i];
983             beagleSetCategoryWeights(m->beagleInstance,
984                                      m->cijkIndex[chain] + i,
985                                      m->inWeights);
986             }
987         }
988     else if (hasPInvar == YES)
989         {
990         for (i=0; i<m->numGammaCats; i++)
991             m->inWeights[i] = freq;
992         beagleSetCategoryWeights(m->beagleInstance,
993                                  m->cijkIndex[chain],
994                                  m->inWeights);
995         }
996
997     /* find nSitesOfPat */
998     nSitesOfPat = numSitesOfPat + (whichSitePats*numCompressedChars) + m->compCharStart;
999     
1000     /* TODO: pattern weights only need to be set when they change */
1001     /* set pattern weights in beagle instance if using dynamic reweighting */
1002     if (chainParams.weightScheme[0] + chainParams.weightScheme[1] > ETA)
1003         {
1004         nSitesOfPat_Beagle = (double *) SafeMalloc (m->numChars * sizeof(double));
1005         for (c=0; c<m->numChars; c++)
1006             nSitesOfPat_Beagle[c] = numSitesOfPat[m->compCharStart + c];
1007         beagleSetPatternWeights(m->beagleInstance,
1008                                 nSitesOfPat_Beagle);
1009         SafeFree ((void **)(&nSitesOfPat_Beagle));
1010         }
1011
1012     /* find root log likelihoods and scalers */
1013     for (i=0; i<m->nCijkParts; i++)
1014         {
1015         m->bufferIndices[i] = m->condLikeIndex[chain][p->index] + i;
1016         m->eigenIndices[i]  = m->cijkIndex[chain] + i;
1017         m->cumulativeScaleIndices[i] = m->siteScalerIndex[chain] + i;
1018         if (t->isRooted == NO)
1019             {
1020             m->childBufferIndices[i]     = m->condLikeIndex  [chain][p->anc->index];
1021             m->childTiProbIndices[i]     = m->tiProbsIndex   [chain][p->index] + i;
1022             }
1023         }
1024
1025     /* reset lnL */
1026     *lnL = 0.0;
1027
1028     /* get root log likelihoods */
1029     if (t->isRooted == YES)
1030         {        
1031         beagleReturn = beagleCalculateRootLogLikelihoods(m->beagleInstance,
1032                                           m->bufferIndices,
1033                                           m->eigenIndices,
1034                                           m->eigenIndices,
1035                                           m->cumulativeScaleIndices,
1036                                           m->nCijkParts,
1037                                           lnL);
1038
1039         }
1040     else
1041         {
1042         beagleReturn = beagleCalculateEdgeLogLikelihoods(m->beagleInstance,
1043                                           m->bufferIndices,
1044                                           m->childBufferIndices,
1045                                           m->childTiProbIndices,
1046                                           NULL,
1047                                           NULL,
1048                                           m->eigenIndices,
1049                                           m->eigenIndices,
1050                                           m->cumulativeScaleIndices,
1051                                           m->nCijkParts,
1052                                           lnL,
1053                                           NULL,
1054                                           NULL);       
1055         }
1056 #   if defined (MB_PRINT_DYNAMIC_RESCALE_FAIL_STAT)
1057     countALL++;
1058 #   endif
1059     if (beagleReturn == BEAGLE_ERROR_FLOATING_POINT)
1060         {
1061 #   if defined (MB_PRINT_DYNAMIC_RESCALE_FAIL_STAT)
1062         countBeagleDynamicFail++;
1063         MrBayesPrint ("DEBUG INFO (not an error) countBeagleDynamicFail:%d countALL:%d\n", countBeagleDynamicFail, countALL);
1064 #   endif
1065         return beagleReturn;
1066         }
1067     assert (beagleReturn == BEAGLE_SUCCESS);
1068     m->succesCount[chain]++;
1069     
1070     /* accumulate logs across sites */
1071     if (hasPInvar == NO)
1072         {
1073         if (m->dataType == RESTRICTION)
1074             {
1075             beagleGetSiteLogLikelihoods(m->beagleInstance, m->logLikelihoods);
1076             (*lnL) = 0.0;
1077             pUnobserved = 0.0;
1078             for (c=0; c<m->numDummyChars; c++)
1079                 {
1080                 pUnobserved +=  exp((double)m->logLikelihoods[c]);
1081                 }
1082             /* correct for absent characters */
1083             (*lnL) -= log (1-pUnobserved) * (m->numUncompressedChars);
1084             for (; c<m->numChars; c++)
1085                 {
1086                 (*lnL) += m->logLikelihoods[c] * nSitesOfPat[c];
1087                 }
1088             }
1089         /* already done, just check for numerical errors */
1090         assert ((*lnL) == (*lnL));
1091         }
1092     else
1093         {
1094         /* has invariable category */
1095         beagleGetSiteLogLikelihoods(m->beagleInstance,
1096                                     m->logLikelihoods);
1097         (*lnL) = 0.0;
1098         for (c=0; c<m->numChars; c++)
1099             {
1100             likeI = 0.0;
1101             for (j=0; j<nStates; j++)
1102                 likeI += (*(clInvar++)) * bs[j];
1103             if (likeI != 0.0)
1104                 {
1105                 lnLikeI = log(likeI * pInvar);
1106                 diff = lnLikeI - m->logLikelihoods[c];
1107                 }
1108             else
1109                 diff = -1000.0;
1110             if (diff < -200.0)
1111                 (*lnL) += m->logLikelihoods[c] * nSitesOfPat[c];
1112             else if (diff > 200.0)
1113                 (*lnL) += lnLikeI * nSitesOfPat[c];
1114             else
1115                 {
1116                 (*lnL) += (m->logLikelihoods[c] + log(1.0 + exp(diff))) * nSitesOfPat[c];
1117                 }
1118
1119             /* check for numerical errors */
1120             assert ((*lnL) == (*lnL));
1121             }       
1122         }
1123         
1124     return (NO_ERROR);
1125 }
1126
1127
1128 /*----------------------------------------------------------------
1129 |
1130 |   TreeTiProbs_Beagle: This routine updates all transition
1131 |       probability matrices of a beagle instance.
1132 |
1133 -----------------------------------------------------------------*/
1134 int TreeTiProbs_Beagle (Tree *t, int division, int chain)
1135 {
1136     int         i, j, k, count;
1137     MrBFlt      correctionFactor, theRate, baseRate, *catRate, length;
1138     TreeNode    *p;
1139     ModelInfo   *m;
1140     
1141     /* get model settings */
1142     m = &modelSettings[division];
1143     
1144     /* find the correction factor to make branch lengths
1145        in terms of expected number of substitutions per character */
1146     correctionFactor = 1.0;
1147     if (m->dataType == DNA || m->dataType == RNA)
1148         {
1149         if (m->nucModelId == NUCMODEL_DOUBLET)
1150             correctionFactor = 2.0;
1151         else if (m->nucModelId == NUCMODEL_CODON)
1152             correctionFactor = 3.0;
1153         }
1154
1155     /* get rate multipliers (for gamma & partition specific rates) */
1156     theRate = 1.0;
1157     baseRate = GetRate (division, chain);
1158     
1159     /* compensate for invariable sites if appropriate */
1160     if (m->pInvar != NULL)
1161         baseRate /= (1.0 - (*GetParamVals(m->pInvar, chain, state[chain])));
1162         
1163     /* get category rates for gamma */
1164     if (m->shape == NULL)
1165         catRate = &theRate;
1166     else
1167         catRate = GetParamSubVals (m->shape, chain, state[chain]);
1168     
1169     /* get effective category rates */
1170     for (k=0; k<m->numGammaCats; k++)
1171         m->inRates[k] = baseRate * catRate[k] * correctionFactor;
1172
1173     /* TODO: only need to set category rates when they change */
1174     /* set category rates */
1175     beagleSetCategoryRates(m->beagleInstance, m->inRates);
1176     
1177     /* get ti prob indices and branch lengths to update */
1178     for (i=count=0; i<t->nNodes; i++)
1179         {
1180         p = t->allDownPass[i];
1181         
1182         /* check if transition probs need updating */
1183         if (p->upDateTi == YES)
1184             {
1185             /* flip transition probability */
1186             FlipTiProbsSpace (m, chain, p->index);
1187             
1188             /* find length */
1189             if (m->cppEvents != NULL)
1190                 {
1191                 length = GetParamSubVals (m->cppEvents, chain, state[chain])[p->index];
1192                 }
1193             else if (m->tk02BranchRates != NULL)
1194                 {
1195                 length = GetParamSubVals (m->tk02BranchRates, chain, state[chain])[p->index];
1196                 }
1197             else if (m->igrBranchRates != NULL)
1198                 {
1199                 length = GetParamSubVals (m->igrBranchRates, chain, state[chain])[p->index];
1200                 }
1201             else if (m->mixedBrchRates != NULL)
1202                 {
1203                 length = GetParamSubVals (m->mixedBrchRates, chain, state[chain])[p->index];
1204                 }
1205             else
1206                 length = p->length;
1207
1208             /* numerical errors might ensue if we allow very large or very small branch lengths, which might
1209                occur in relaxed clock models; an elegant solution would be to substitute the stationary
1210                probs and initial probs but for now we truncate lengths at small or large values */
1211             if (length > BRLENS_MAX)
1212                 length = BRLENS_MAX;
1213             else if (length < BRLENS_MIN)
1214                 length = BRLENS_MIN;
1215
1216             m->branchLengths[count] = length;
1217             
1218             /* find index */
1219             m->tiProbIndices[count] = m->tiProbsIndex[chain][p->index];
1220             count++;
1221             }
1222         }
1223
1224     /* TODO: only need to update branches that have changed */
1225     /* calculate transition probabilities */
1226     if (count > 0) {
1227         for (i=0; i<m->nCijkParts; i++)
1228             {
1229             beagleUpdateTransitionMatrices(m->beagleInstance,
1230                                            m->cijkIndex[chain] + i,
1231                                            m->tiProbIndices,
1232                                            NULL,
1233                                            NULL,
1234                                            m->branchLengths,
1235                                            count);
1236             for (j=0; j<count; j++)
1237                 m->tiProbIndices[j]++;
1238             }
1239         }
1240
1241     /* return success */
1242     return NO_ERROR;
1243 }
1244
1245 #endif // BEAGLE_ENABLED
1246
1247
1248 void BeagleNotLinked()
1249 {
1250     MrBayesPrint ("%s   BEAGLE library is not linked to this executable.\n", spacer);
1251 }
1252
1253
1254 void BeagleThreadsNotLinked()
1255 {
1256     MrBayesPrint ("%s   Pthreads library is not linked to this executable.\n", spacer);
1257 }
1258