src/mlphylo.c

   1 /* mlphylo.c       2008-06-18 */
   2
   3 /* Copyright 2006-2008 Emmanuel Paradis */
   4
   5 /* This file is part of the R-package `ape'. */
   6 /* See the file ../COPYING for licensing issues. */
   7
   8 #include <R.h>
   9 #include <Rmath.h>
  10 #include <R_ext/Applic.h>
  11 #include <R_ext/Lapack.h>
  12
  13 typedef struct {
  14         int *n;
  15         int *s;
  16         double *w;
  17         unsigned char *seq;
  18         double *anc;
  19 } dna_matrix;
  20
  21 typedef struct {
  22         int *edge1;
  23         int *edge2;
  24         double *el;
  25 } phylo;
  26
  27 typedef struct {
  28         int *npart;
  29         int *partition;
  30         int *model;
  31         double *xi;
  32         double *para;
  33         int *npara;
  34         double *alpha;
  35         int *nalpha;
  36         int *ncat;
  37         double *invar;
  38         int *ninvar;
  39 } DNAmodel;
  40
  41 typedef struct {
  42         dna_matrix X;
  43         phylo PHY;
  44         DNAmodel MOD;
  45         double *BF;
  46 } DNAdata;
  47
  48 typedef struct {
  49         DNAdata *D; int i;
  50 } info;
  51
  52
  53 void tQ_unbalBF(double *BF, double *P)
  54 /* This function computes the rate matrix Q multiplied by
  55    time t in the case of unbalanced base frequencies.
  56    The arguments are:
  57   BF: the base frequencies
  58    P: (input) the matrix of substitution rates
  59       (output) tQ
  60    NOTE: P must already be multiplied by t */
  61 {
  62    P[1] *= BF[0];  P[2] *= BF[0];  P[3] *= BF[0];
  63    P[4] *= BF[1];  P[6] *= BF[1];  P[7] *= BF[1];
  64    P[8] *= BF[2];  P[9] *= BF[2]; P[11] *= BF[2];
  65   P[12] *= BF[3]; P[13] *= BF[3]; P[14] *= BF[3];
  66
  67    P[0] = -P[4] - P[8] - P[12];
  68    P[5] = -P[1] - P[9] - P[13];
  69   P[10] = -P[2] - P[6] - P[14];
  70   P[15] = -P[3] - P[7] - P[11];
  71 }
  72
  73 void mat_expo4x4(double *P)
  74 /* This function computes the exponential of a 4x4 matrix */
  75 {
  76   double U[16], vl[4], WR[4], Uinv[16], WI[4], work[32];
  77   int i, info, ipiv[16], n = 4, lw = 32, ord[4];
  78   char yes = 'V', no = 'N';
  79
  80   /* The matrix is not symmetric, so we use 'dgeev'. */
  81   /* We take the real part of the eigenvalues -> WR */
  82   /* and the right eigenvectors (vr) -> U */
  83   F77_CALL(dgeev)(&no, &yes, &n, P, &n, WR, WI, vl, &n,
  84                   U, &n, work, &lw, &info);
  85
  86   /* It is not necessary to sort the eigenvalues... */
  87   /* Copy U -> P */
  88   for (i = 0; i < 16; i++) P[i] = U[i];
  89
  90   /* For the inversion, we first make Uinv an identity matrix */
  91   for (i = 1; i < 15; i++) Uinv[i] = 0;
  92   Uinv[0] = Uinv[5] = Uinv[10] = Uinv[15] = 1;
  93
  94   /* The matrix is not symmetric, so we use 'dgesv'. */
  95   /* This subroutine puts the result in Uinv (B) */
  96   /* (P [= U] is erased) */
  97   F77_CALL(dgesv)(&n, &n, P, &n, ipiv, Uinv, &n, &info);
  98
  99   /* The matrix product of U with the eigenvalues diagonal matrix: */
 100   for (i = 0; i < 4; i++) U[i] *= exp(WR[0]);
 101   for (i = 4; i < 8; i++) U[i] *= exp(WR[1]);
 102   for (i = 8; i < 12; i++) U[i] *= exp(WR[2]);
 103   for (i = 12; i < 16; i++) U[i] *= exp(WR[3]);
 104
 105   /* The second matrix product with U^-1 */
 106   P[1] = U[1]*Uinv[0] + U[5]*Uinv[1] + U[9]*Uinv[2] + U[13]*Uinv[3];
 107   P[2] = U[2]*Uinv[0] + U[6]*Uinv[1] + U[10]*Uinv[2] + U[14]*Uinv[3];
 108   P[3] = U[3]*Uinv[0] + U[7]*Uinv[1] + U[11]*Uinv[2] + U[15]*Uinv[3];
 109   P[4] = U[0]*Uinv[4] + U[4]*Uinv[5] + U[8]*Uinv[6] + U[12]*Uinv[7];
 110   P[6] = U[2]*Uinv[4] + U[6]*Uinv[5] + U[10]*Uinv[6] + U[14]*Uinv[7];
 111   P[7] = U[3]*Uinv[4] + U[7]*Uinv[5] + U[11]*Uinv[6] + U[15]*Uinv[7];
 112   P[8] = U[0]*Uinv[8] + U[4]*Uinv[9] + U[8]*Uinv[10] + U[12]*Uinv[11];
 113   P[9] = U[1]*Uinv[8] + U[5]*Uinv[9] + U[9]*Uinv[10] + U[13]*Uinv[11];
 114   P[11] = U[3]*Uinv[8] + U[7]*Uinv[9] + U[11]*Uinv[10] + U[15]*Uinv[11];
 115   P[12] = U[0]*Uinv[12] + U[4]*Uinv[13] + U[8]*Uinv[14] + U[12]*Uinv[15];
 116   P[13] = U[1]*Uinv[12] + U[5]*Uinv[13] + U[9]*Uinv[14] + U[13]*Uinv[15];
 117   P[14] = U[2]*Uinv[12] + U[6]*Uinv[13] + U[10]*Uinv[14] + U[14]*Uinv[15];
 118   P[0] = 1 - P[4] - P[8] - P[12];
 119   P[5] = 1 - P[1] - P[9] - P[13];
 120   P[10] = 1 - P[2] - P[6] - P[14];
 121   P[15] = 1 - P[3] - P[7] - P[11];
 122 }
 123
 124 void PMAT_JC69(double t, double u, double *P)
 125 {
 126   P[1]=P[2]=P[3]=P[4]=P[6]=P[7]=P[8]=P[9]=P[11]=P[12]=P[13]=P[14]=(1 - exp(-4*u*t))/4;
 127   P[0] = P[5] = P[10] = P[15] = 1 - 3*P[1];
 128 }
 129
 130 void PMAT_K80(double t, double b, double a, double *P)
 131 {
 132   double R, p;
 133
 134   R = a/(2*b);
 135   p = exp(-2*t/(R + 1));
 136
 137   P[1] = 0.5*(1 - p); /* A -> C */
 138   P[2] = 0.25 - 0.5*exp(-t*(2*R + 1)/(R + 1)) + 0.25*p; /* A -> G */
 139   P[0] = P[5] = P[10] = P[15] = 1 - 2*P[1] - P[2];
 140   P[3] = P[4] = P[6] = P[11] = P[9] = P[12] = P[14] = P[1];
 141   P[7] = P[8] = P[13] = P[2];
 142 }
 143
 144 void PMAT_F81(double t, double u, double *BF, double *P)
 145 {
 146   double p;
 147   p = exp(-t*u);
 148
 149   P[0] = p + (1 - p) * BF[0]; /* A->A */
 150   P[1] = P[9] = P[13] = (1 - p)*BF[1]; /* A->C, G->C, T->C */
 151   P[2] = P[6] = P[14] = (1 - p)*BF[2]; /* A->G, C->G, T->G */
 152   P[3] = P[7] = P[11] = (1 - p)*BF[3]; /* A->T, C->T, G->T */
 153   P[4] = P[8] = P[12] = (1 - p)*BF[0]; /* C->A, G->A, T->A */
 154   P[5] = p + P[1]; /* C->C */
 155   P[10] = p + P[2]; /* G->G */
 156   P[15] = p + P[3]; /* T->T */
 157 }
 158
 159 void PMAT_F84(double t, double a, double b, double *BF, double *P)
 160 {
 161   double pI, pII, B, x, y;
 162
 163   B = exp(-b*t);
 164   pI = B * (1 - exp(-a*t)); /* probability of at least one event of type I */
 165   pII = 1 - B; /* probability of at least one event of type II */
 166   x = pI * (BF[0] + BF[2]);
 167   y = pI * (BF[1] + BF[3]);
 168
 169   P[12] = P[4] = pII * BF[0];   /* C->A, T->A */
 170   P[14] = P[6] = pII * BF[2];   /* C->G, T->G */
 171   P[9] = P[1] = pII * BF[1];   /* A->C, G->C */
 172   P[2] = x + P[6];      /* A->G */
 173   P[11] = P[3] = pII * BF[3];   /* A->T, G->T*/
 174   P[0] = 1 - P[1] - P[2] - P[3];  /* A->A */
 175   P[7] = y +  P[3];     /* C->T */
 176   P[5] = 1 - P[4] - P[6] - P[7];  /* C->C */
 177   P[8] = x + P[4];      /* G->A */
 178   P[10] = 1 - P[8] - P[9] - P[11]; /* G->G */
 179   P[13] = y + P[1];     /* T->C */
 180   P[15] = 1 - P[12] - P[13] - P[14]; /* T->T */
 181 }
 182
 183 void PMAT_HKY85(double t, double a, double b, double *BF, double *P)
 184 {
 185   P[2] = P[7] = P[8] = P[13] = t*a;
 186   P[1] = P[3] = P[4] = P[6] = P[9] = P[11] = P[12] = P[14] = t*b;
 187   tQ_unbalBF(BF, P);
 188   mat_expo4x4(P);
 189 }
 190
 191 void PMAT_T92(double t, double a, double b, double *BF, double *P)
 192 {
 193   double theta, A, B1, B2, C, x, y;
 194
 195   theta = BF[1] + BF[2];
 196   A = (1 - theta)/2;
 197   B1 = (1 + exp(-t));
 198   B2 = (1 - exp(-t));
 199   C = exp(-t * ((a/b + 1)/2));
 200   x = 0.5 * theta * B1;
 201   y = (1 - theta) * C;
 202
 203   P[1] = P[6] = P[9] = P[14] = 0.5 * theta * B2; /* A->C, C->G, T->G, G->C */
 204   P[2] = P[13] = x - theta * C; /* A->G, T->C */
 205   P[3] = P[4] = P[11] = P[12] = A * B2; /* A->T, C->A, G->T, T->A */
 206   P[0] = P[15] = 1 - P[1] - P[2] - P[3]; /* A->A, T->T */
 207   P[5] = P[10] = x + y; /* C->C, G->G */
 208   P[7] = P[8] = A * B1 - y; /* C->T, G->A */
 209 }
 210
 211 void PMAT_TN93(double t, double a1, double a2, double b,
 212                double *BF, double *P)
 213 {
 214
 215   double A1, A2, B;
 216
 217   A1 = (1 - exp(-a1*t)); /* transitions between purines (A <-> G) */
 218   A2 = (1 - exp(-a2*t)); /* transitions between pyrimidines (C <-> T) */
 219   B = (1 - exp(-b*t));
 220
 221   P[1] = B * BF[1];                  /* A->C */
 222   P[2] = A1 * BF[2];                 /* A->G */
 223   P[3] = B * BF[3];                  /* A->T */
 224   P[0] = 1 - P[1] - P[2] - P[3];     /* A->A */
 225   P[4] = B * BF[0];                  /* C->A */
 226   P[6] = B * BF[2];                  /* C->G */
 227   P[7] = A2 * BF[3] ;                /* C->T */
 228   P[5] = 1 - P[4] - P[6] - P[7];     /* C->C */
 229   P[8] = A1 * BF[0];                 /* G->A */
 230   P[9] = B * BF[1];                  /* G->C */
 231   P[11] = B * BF[3];                 /* G->T */
 232   P[10] = 1 - P[8] - P[9] - P[11];   /* G->G */
 233   P[12] = B * BF[0];                 /* T->A */
 234   P[13] = A2 * BF[1];                /* T->C */
 235   P[14] = B * BF[2];                 /* T->G */
 236   P[15] = 1 - P[12] - P[13] - P[14]; /* T->T */
 237 }
 238
 239 void PMAT_GTR(double t, double a, double b, double c, double d, double e,
 240               double f, double *BF, double *P)
 241 {
 242   P[1] = P[4] = t*a;
 243   P[2] = P[8] = t*b;
 244   P[3] = P[12] = t*c;
 245   P[6] = P[9] = t*d;
 246   P[7] = P[13] = t*e;
 247   P[11] = P[14] = t*f;
 248   tQ_unbalBF(BF, P);
 249   mat_expo4x4(P);
 250 }
 251
 252 #define GET_DNA_PARAMETERS \
 253     /* get the substitution parameters */ \
 254     model = *(D->MOD.model); \
 255     /* If the model is not JC69 or F81: */ \
 256     if (model != 1 && model != 3) { \
 257         for(i = 0; i < *(D->MOD.npara); i++) \
 258             u[i] = D->MOD.para[i]; \
 259     } \
 260     /* get the shape parameter and calculate the coefficients */ \
 261     ncat = *(D->MOD.ncat); \
 262     if (ncat > 1) { \
 263         /* use `tmp[0]' to store the mean of the coefficients */ \
 264         /* in order to rescale them */ \
 265         tmp[0] = 0.; \
 266         if (*(D->MOD.nalpha) > 1) alpha = *(D->MOD.alpha); \
 267         else alpha = D->MOD.alpha[k]; \
 268         for (j = 0; j < ncat; j++) { \
 269                 coef_gamma[j] = qgamma((0.5 + j)/ncat, alpha, \
 270                                        1/alpha, 1, 0); \
 271                 tmp[0] += coef_gamma[j]; \
 272         } \
 273         tmp[0] /= ncat; \
 274         for (j = 0; j < ncat; j++) \
 275           coef_gamma[j] /= tmp[0]; \
 276     } else coef_gamma[0] = 1.; \
 277     /* get the proportion of invariants */ \
 278     if (*(D->MOD.ninvar)) { \
 279         if (*(D->MOD.ninvar) > 1) I = *(D->MOD.invar); \
 280         else I = D->MOD.invar[k]; \
 281     } else I = 0.; \
 282
 283 void getSiteLik(int n, int d, int j, int nr, DNAdata *D, double *L)
 284 {
 285         int i;
 286
 287         if (d <= n - 1) {
 288                 i = d + j*n;
 289                 memset(L, 0, 4*sizeof(double));
 290                 if (D->X.seq[i] & 128) L[0] = 1; /* if A or else */
 291                 if (D->X.seq[i] & 32) L[1] = 1; /* if C or else */
 292                 if (D->X.seq[i] & 64) L[2] = 1; /* if G or else */
 293                 if (D->X.seq[i] & 16) L[3] = 1; /* if T or else */
 294         } else {
 295                 i = (d - n) + j*(n - 2);
 296                 L[0] = D->X.anc[i];
 297                 L[1] = D->X.anc[i + nr];
 298                 L[2] = D->X.anc[i + 2*nr];
 299                 L[3] = D->X.anc[i + 3*nr];
 300         }
 301 }
 302
 303 #define LOOP_THROUGH_SITES \
 304     for(j = start; j <= end; j++) { \
 305         memset(tmp, 0, 4*sizeof(double)); \
 306         getSiteLik(n, d1, j, nr, D, L1); \
 307         getSiteLik(n, d2, j, nr, D, L2); \
 308         for(i = 0; i < ncat; i++) { \
 309             switch(model) { \
 310             case 1 : PMAT_JC69(l1, coef_gamma[i], P1); \
 311                      PMAT_JC69(l2, coef_gamma[i], P2); break; \
 312             case 2 : PMAT_K80(l1, coef_gamma[i], u[0], P1); \
 313                      PMAT_K80(l2, coef_gamma[i], u[0], P2); break; \
 314             case 3 : PMAT_F81(l1, coef_gamma[i], BF, P1); \
 315                      PMAT_F81(l2, coef_gamma[i], BF, P2); break; \
 316             case 4 : PMAT_F84(l1, coef_gamma[i], u[0], BF, P1); \
 317                      PMAT_F84(l2, coef_gamma[i], u[0], BF, P2); break; \
 318             case 5 : PMAT_HKY85(l1, coef_gamma[i], u[0], BF, P1); \
 319                      PMAT_HKY85(l2, coef_gamma[i], u[0], BF, P2); break; \
 320             case 6 : PMAT_T92(l1, coef_gamma[i], u[0], BF, P1); \
 321                      PMAT_T92(l2, coef_gamma[i], u[0], BF, P2); break; \
 322             case 7 : PMAT_TN93(l1, coef_gamma[i], u[0], u[1], BF, P1); \
 323                      PMAT_TN93(l2, coef_gamma[i], u[0], u[1], BF, P2); break; \
 324             case 8 : PMAT_GTR(l1, coef_gamma[i], u[0], u[1], u[2], u[3], u[4], BF, P1); \
 325                      PMAT_GTR(l2, coef_gamma[i], u[0], u[1], u[2], u[3], u[4], BF, P2); break; \
 326             } \
 327             tmp[0] += (L1[0]*P1[0] + L1[1]*P1[1] + L1[2]*P1[2] + L1[3]*P1[3]) * \
 328                       (L2[0]*P2[0] + L2[1]*P2[1] + L2[2]*P2[2] + L2[3]*P2[3]); \
 329             tmp[1] += (L1[0]*P1[4] + L1[1]*P1[5] + L1[2]*P1[6] + L1[3]*P1[7]) * \
 330                       (L2[0]*P2[4] + L2[1]*P2[5] + L2[2]*P2[6] + L2[3]*P2[7]); \
 331             tmp[2] += (L1[0]*P1[8] + L1[1]*P1[9] + L1[2]*P1[10] + L1[3]*P1[11]) * \
 332                       (L2[0]*P2[8] + L2[1]*P2[9] + L2[2]*P2[10] + L2[3]*P2[11]); \
 333             tmp[3] += (L1[0]*P1[12] + L1[1]*P1[13] + L1[2]*P1[14] + L1[3]*P1[15]) * \
 334                       (L2[0]*P2[12] + L2[1]*P2[13] + L2[2]*P2[14] + L2[3]*P2[15]); \
 335         } \
 336         if (ncat > 1) { \
 337             tmp[0] /= ncat; \
 338             tmp[1] /= ncat; \
 339             tmp[2] /= ncat; \
 340             tmp[3] /= ncat; \
 341         } \
 342         if (D->MOD.ninvar) { \
 343             V = 1. - I; \
 344             tmp[0] = V*tmp[0] + I*L1[0]*L2[0]; \
 345             tmp[1] = V*tmp[1] + I*L1[1]*L2[1]; \
 346             tmp[2] = V*tmp[2] + I*L1[2]*L2[2]; \
 347             tmp[3] = V*tmp[3] + I*L1[3]*L2[3]; \
 348         } \
 349         ind = anc - n + j*(n - 2); \
 350         D->X.anc[ind] = tmp[0]; \
 351         D->X.anc[ind + nr] = tmp[1]; \
 352         D->X.anc[ind + 2*nr] = tmp[2]; \
 353         D->X.anc[ind + 3*nr] = tmp[3]; \
 354     }
 355
 356 void lik_dna_node(DNAdata *D, int ie)
 357 /*
 358 This function computes the likelihoods at a node for all
 359 nucleotides.
 360 */
 361 {
 362         int d1, d2, anc, n, nr;
 363         int i, j, k, start, end, ind, i1, i2, ncat, model;
 364         double tmp[4], L1[4], L2[4], l1, l2, P1[16], P2[16], V, coef_gamma[10], u[6], I, alpha, *BF;
 365
 366         n = *(D->X.n);
 367         nr = *(D->X.s) * (n - 2);
 368         BF = D->BF;
 369
 370         d1 = D->PHY.edge2[ie];
 371         d2 = D->PHY.edge2[ie + 1];
 372         anc = D->PHY.edge1[ie];
 373
 374         /* change these to use them as indices */
 375         d1--; d2--; anc--;
 376
 377         l1 = D->PHY.el[ie];
 378         l2 = D->PHY.el[ie + 1];
 379
 380         for(k = 0; k < *(D->MOD.npart); k++) {
 381                 start = D->MOD.partition[k*2] - 1;
 382                 end = D->MOD.partition[k*2 + 1] - 1;
 383
 384                 GET_DNA_PARAMETERS
 385
 386                 if (k > 0) {
 387                         l1 *= D->MOD.xi[k - 1];
 388                         l2 *= D->MOD.xi[k - 1];
 389                 }
 390
 391                 LOOP_THROUGH_SITES
 392         }
 393 } /* lik_dna_node */
 394
 395 void lik_dna_root(DNAdata *D)
 396 /*
 397 This function computes the likelihoods at the root for all
 398 nucleotides.
 399 */
 400 {
 401         int i, j, k, start, end, ind, ncat, model, d1, d2, n, N, nr;
 402         double tmp[4], L1[4], L2[4], el, P[16], V, coef_gamma[10], u[6], I, alpha, *BF;
 403
 404         n = *(D->X.n); /* number of tips */
 405         N = 2*n - 3; /* number of edges */
 406         nr = *(D->X.s) * (n - 2);
 407         BF = D->BF;
 408
 409         d1 = D->PHY.edge1[N - 1]; /* it's the root */
 410         d2 = D->PHY.edge2[N - 1];
 411
 412         /* change these to use them as indices */
 413         d1--; d2--;
 414
 415         el = D->PHY.el[N - 1];
 416
 417         for(k = 0; k < *(D->MOD.npart); k++) {
 418                 start = D->MOD.partition[k*2] - 1;
 419                 end = D->MOD.partition[k*2 + 1] - 1;
 420
 421                 GET_DNA_PARAMETERS
 422
 423                 if (k > 0) el *= D->MOD.xi[k - 1];
 424
 425                 for(j = start; j <= end; j++) {
 426                         getSiteLik(n, d1, j, nr, D, L1);
 427                         getSiteLik(n, d2, j, nr, D, L2);
 428                         memset(tmp, 0, 4*sizeof(double));
 429                         for(i = 0; i < ncat; i++) {
 430                                 switch(model) {
 431                                 case 1 : PMAT_JC69(el, coef_gamma[i], P); break;
 432                                 case 2 : PMAT_K80(el, coef_gamma[i], u[0], P); break;
 433                                 case 3 : PMAT_F81(el, coef_gamma[i], BF, P); break;
 434                                 case 4 : PMAT_F84(el, coef_gamma[i], u[0], BF, P); break;
 435                                 case 5 : PMAT_HKY85(el, coef_gamma[i], u[0], BF, P); break;
 436                                 case 6 : PMAT_T92(el, coef_gamma[i], u[0], BF, P); break;
 437                                 case 7 : PMAT_TN93(el, coef_gamma[i], u[0], u[1], BF, P); break;
 438                                 case 8 : PMAT_GTR(el, coef_gamma[i], u[0], u[1], u[2], u[3], u[4], BF, P); break;
 439                                 }
 440                                 tmp[0] += (L2[0]*P[0] + L2[1]*P[1] + L2[2]*P[2] + L2[3]*P[3]);
 441                                 tmp[1] += (L2[0]*P[4] + L2[1]*P[5] + L2[2]*P[6] + L2[3]*P[7]);
 442                                 tmp[2] += (L2[0]*P[8] + L2[1]*P[9] + L2[2]*P[10] + L2[3]*P[11]);
 443                                 tmp[3] += (L2[0]*P[12] + L2[1]*P[13] + L2[2]*P[14] + L2[3]*P[15]);
 444                         }
 445                         if (ncat > 1) {
 446                                 tmp[0] /= ncat;
 447                                 tmp[1] /= ncat;
 448                                 tmp[2] /= ncat;
 449                                 tmp[3] /= ncat;
 450                         }
 451                         if (*(D->MOD.ninvar)) {
 452                                 V = 1. - I;
 453                                 tmp[0] = V*tmp[0] + I*L2[0]*L1[0];
 454                                 tmp[1] = V*tmp[1] + I*L2[1]*L1[1];
 455                                 tmp[2] = V*tmp[2] + I*L2[2]*L1[2];
 456                                 tmp[3] = V*tmp[3] + I*L2[3]*L1[3];
 457                         }
 458                         ind = j*(n - 2);
 459                         D->X.anc[ind] = tmp[0]*L1[0];
 460                         D->X.anc[ind + nr] = tmp[1]*L1[1];
 461                         D->X.anc[ind + 2*nr] = tmp[2]*L1[2];
 462                         D->X.anc[ind + 3*nr] = tmp[3]*L1[3];
 463                 }
 464         }
 465 } /* lik_dna_root */
 466
 467 void lik_dna_tree(DNAdata *D, double *loglik)
 468 {
 469     int i, j, n, nnode, nsite, nr;
 470     double tmp;
 471
 472     n = *(D->X.n);
 473     nnode = n - 2;
 474     nsite = *(D->X.s);
 475     nr = nsite*nnode;
 476
 477     /* loop through the tree
 478        We don't do the root node here, so i goes between 0 and 2n - 5 */
 479     for(i = 0; i < 2*n - 4; i += 2) lik_dna_node(D, i);
 480
 481     /* We now do the root */
 482     lik_dna_root(D);
 483     *loglik = 0.;
 484     for(j = 0; j < nsite; j++) {
 485             tmp = 0.;
 486             for (i = 0; i < 4; i++)
 487                     tmp += D->BF[i] * D->X.anc[j*nnode + i*nr];
 488             *loglik += D->X.w[j]*log(tmp);
 489     }
 490 } /* lik_dna_tree */
 491
 492 double fcn_mlphylo_invar(double I, info *INFO)
 493 {
 494     double loglik;
 495
 496     INFO->D->MOD.invar[INFO->i] = I;
 497     lik_dna_tree(INFO->D, &loglik);
 498
 499     return -loglik;
 500 }
 501
 502 void mlphylo_invar(int N, DNAdata *D, double *loglik)
 503 /*
 504 optimize proportion of invariants
 505 */
 506 {
 507     int i;
 508     info INFO, *infptr;
 509     double I;
 510
 511     infptr = &INFO;
 512     INFO.D = D;
 513
 514     for(i = 0; i < N; i++) {
 515         infptr->i = i;
 516         I = Brent_fmin(0.0, 1.0,
 517                        (double (*)(double, void*)) fcn_mlphylo_invar,
 518                        infptr, 1.e-9);
 519         D->MOD.invar[i] = I;
 520     }
 521 }
 522
 523 double fcn_mlphylo_gamma(double a, info *INFO)
 524 {
 525     double loglik;
 526
 527     INFO->D->MOD.alpha[INFO->i] = a;
 528     lik_dna_tree(INFO->D, &loglik);
 529
 530     return -loglik;
 531 }
 532
 533 void mlphylo_gamma(int N, DNAdata *D, double *loglik)
 534 /*
 535 optimize gamma (ISV) parameters
 536 */
 537 {
 538     int i;
 539     info INFO, *infptr;
 540     double a;
 541
 542     infptr = &INFO;
 543     INFO.D = D;
 544
 545     for(i = 0; i < N; i++) {
 546         infptr->i = i;
 547         a = Brent_fmin(0.0, 1.e4,
 548                        (double (*)(double, void*)) fcn_mlphylo_gamma,
 549                        infptr, 1.e-6);
 550         D->MOD.alpha[i] = a;
 551     }
 552 }
 553
 554 double fcn_mlphylo_para(double p, info *INFO)
 555 {
 556     double loglik;
 557
 558     INFO->D->MOD.para[INFO->i] = p;
 559     lik_dna_tree(INFO->D, &loglik);
 560
 561     return -loglik;
 562 }
 563
 564 void mlphylo_para(int N, DNAdata *D, double *loglik)
 565 /*
 566 optimize the contrast parameter(s) xi
 567 */
 568 {
 569     int i;
 570     info INFO, *infptr;
 571     double p;
 572
 573     infptr = &INFO;
 574     INFO.D = D;
 575
 576     for(i = 0; i < N; i++) {
 577         infptr->i = i;
 578         p = Brent_fmin(0, 1.e3,
 579                        (double (*)(double, void*)) fcn_mlphylo_para,
 580                        infptr, 1.e-6);
 581         D->MOD.para[i] = p;
 582     }
 583 }
 584
 585 double fcn_mlphylo_xi(double XI, info *INFO)
 586 {
 587     double loglik;
 588
 589     INFO->D->MOD.xi[INFO->i] = XI;
 590     lik_dna_tree(INFO->D, &loglik);
 591
 592     return -loglik;
 593 }
 594
 595 void mlphylo_xi(int N, DNAdata *D, double *loglik)
 596 /*
 597 optimize the contrast parameter(s) xi
 598 */
 599 {
 600     int i;
 601     info INFO, *infptr;
 602     double XI;
 603
 604     infptr = &INFO;
 605     INFO.D = D;
 606
 607     /* In the following, the range of the search algo was changed from */
 608     /* 0-1000 to 0-100 to avoid infinite looping. (2006-04-15) */
 609     /* This was changed again to 0-20. (2006-07-17) */
 610
 611     for(i = 0; i < N; i++) {
 612         infptr->i = i;
 613         XI = Brent_fmin(0.0, 2.e1,
 614                        (double (*)(double, void*)) fcn_mlphylo_xi,
 615                        infptr, 1.e-4);
 616         D->MOD.xi[i] = XI;
 617     }
 618 }
 619
 620 double fcn_mlphylo_edgelength(double l, info *INFO)
 621 {
 622     double loglik;
 623
 624     INFO->D->PHY.el[INFO->i] = l;
 625     lik_dna_tree(INFO->D, &loglik);
 626
 627     return -loglik;
 628 }
 629
 630 void mlphylo_edgelength(int N, DNAdata *D, double *loglik)
 631 /*
 632 optimize branch lengths
 633 */
 634 {
 635     int i;
 636     info INFO, *infptr;
 637     double l;
 638
 639     infptr = &INFO;
 640     INFO.D = D;
 641
 642     for(i = 0; i < N; i++) {
 643         infptr->i = i;
 644         l = Brent_fmin(0.0, 0.1,
 645                        (double (*)(double, void*)) fcn_mlphylo_edgelength,
 646                        infptr, 1.e-6);
 647         D->PHY.el[i] = l;
 648     }
 649 }
 650
 651 void mlphylo_DNAmodel(int *n, int *s, unsigned char *SEQ, double *ANC,
 652                       double *w, int *edge1, int *edge2,
 653                       double *edge_length, int *npart, int *partition,
 654                       int *model, double *xi, double *para, int *npara,
 655                       double *alpha, int *nalpha, int *ncat,
 656                       double *invar, int *ninvar, double *BF,
 657                       int *search_tree, int *fixed, double *loglik)
 658 /*
 659 This function iterates to find the MLEs of the substitution
 660 paramaters and of the branch lengths for a given tree.
 661 */
 662 {
 663         DNAdata *D, data;
 664
 665         D = &data;
 666
 667         D->X.n = n;
 668         D->X.s = s;
 669         D->X.w = w;
 670         D->X.seq = SEQ;
 671         D->X.anc = ANC;
 672
 673         D->PHY.edge1 = edge1;
 674         D->PHY.edge2 = edge2;
 675         D->PHY.el = edge_length;
 676
 677         D->MOD.npart = npart;
 678         D->MOD.partition = partition;
 679         D->MOD.model = model;
 680         D->MOD.xi = xi;
 681         D->MOD.para = para;
 682         D->MOD.npara = npara;
 683         D->MOD.alpha = alpha;
 684         D->MOD.nalpha = nalpha;
 685         D->MOD.ncat = ncat;
 686         D->MOD.invar = invar;
 687         D->MOD.ninvar = ninvar;
 688
 689         D->BF = BF;
 690
 691         lik_dna_tree(D, loglik);
 692         if (! *fixed) {
 693                 if (*npart > 1) mlphylo_xi(*npart - 1, D, loglik);
 694                 if (*npara) mlphylo_para(*npara, D, loglik);
 695                 if (*nalpha) mlphylo_gamma(*nalpha, D, loglik);
 696                 if (*ninvar) mlphylo_invar(*ninvar, D, loglik);
 697                 lik_dna_tree(D, loglik);
 698         }
 699
 700 } /* mlphylo_DNAmodel */
 701 /*
 702 void jc69(double *P, double *t, double *u)
 703 {
 704         PMAT_JC69(*t, *u, P);
 705 }
 706
 707 void k80(double *P, double *t, double *u)
 708 {
 709         PMAT_K80(*t, u[0], u[1], P);
 710 }
 711 */