1 /* me_ols.c 2012-04-30 */
3 /* Copyright 2007 Vincent Lefort
4 GMEsplitEdge() modified by Emmanuel Paradis */
6 /* This file is part of the R-package `ape'. */
7 /* See the file ../COPYING for licensing issues. */
12 void fillTableUp(edge *e, edge *f, double **A, double **D, tree *T);
14 /*OLSint and OLSext use the average distance array to calculate weights
15 instead of using the edge average weight fields*/
17 void OLSext(edge *e, double **A)
23 e->distance = 0.5*(A[e->head->index][e->tail->index]
24 + A[e->head->index][f->head->index]
25 - A[f->head->index][e->tail->index]);
29 f = e->head->leftEdge;
30 g = e->head->rightEdge;
31 e->distance = 0.5*(A[e->head->index][f->head->index]
32 + A[e->head->index][g->head->index]
33 - A[f->head->index][g->head->index]);
37 double wf(double lambda, double D_LR, double D_LU, double D_LD,
38 double D_RU, double D_RD, double D_DU)
41 weight = 0.5*(lambda*(D_LU + D_RD) + (1 -lambda)*(D_LD + D_RU)
46 void OLSint(edge *e, double **A)
49 edge *left, *right, *sib;
50 left = e->head->leftEdge;
51 right = e->head->rightEdge;
53 lambda = ((double) sib->bottomsize*left->bottomsize +
54 right->bottomsize*e->tail->parentEdge->topsize) /
55 (e->bottomsize*e->topsize);
56 e->distance = wf(lambda,A[left->head->index][right->head->index],
57 A[left->head->index][e->tail->index],
58 A[left->head->index][sib->head->index],
59 A[right->head->index][e->tail->index],
60 A[right->head->index][sib->head->index],
61 A[sib->head->index][e->tail->index]);
65 void assignOLSWeights(tree *T, double **A)
68 e = depthFirstTraverse(T,NULL);
70 if ((leaf(e->head)) || (leaf(e->tail)))
74 e = depthFirstTraverse(T,e);
78 /*makes table of average distances from scratch*/
79 void makeOLSAveragesTable(tree *T, double **D, double **A)
84 e = depthFirstTraverse(T,e);
88 exclude = e->tail->parentEdge;
89 /*we want to calculate A[e->head][f->head] for all edges
90 except those edges which are ancestral to e. For those
91 edges, we will calculate A[e->head][f->head] to have a
92 different meaning, later*/
99 A[e->head->index][f->head->index] = A[f->head->index][e->head->index] = D[e->head->index2][f->head->index2];
102 g = f->head->leftEdge;
103 h = f->head->rightEdge;
104 A[e->head->index][f->head->index] = A[f->head->index][e->head->index] = (g->bottomsize*A[e->head->index][g->head->index] + h->bottomsize*A[e->head->index][h->head->index])/f->bottomsize;
107 else /*exclude == f*/
108 exclude = exclude->tail->parentEdge;
109 f = depthFirstTraverse(T,f);
112 /*e->head is not a leaf, so we go recursively to values calculated for
118 g = e->head->leftEdge;
119 h = e->head->rightEdge;
120 A[e->head->index][f->head->index] = A[f->head->index][e->head->index] = (g->bottomsize*A[f->head->index][g->head->index] + h->bottomsize*A[f->head->index][h->head->index])/e->bottomsize;
123 exclude = exclude->tail->parentEdge;
124 f = depthFirstTraverse(T,f);
127 /*now we move to fill up the rest of the table: we want
128 A[e->head->index][f->head->index] for those cases where e is an
129 ancestor of f, or vice versa. We'll do this by choosing e via a
130 depth first-search, and the recursing for f up the path to the
132 f = e->tail->parentEdge;
134 fillTableUp(e,f,A,D,T);
135 e = depthFirstTraverse(T,e);
138 /*we are indexing this table by vertex indices, but really the
139 underlying object is the edge set. Thus, the array is one element
140 too big in each direction, but we'll ignore the entries involving the root,
141 and instead refer to each edge by the head of that edge. The head of
142 the root points to the edge ancestral to the rest of the tree, so
143 we'll keep track of up distances by pointing to that head*/
145 /*10/13/2001: collapsed three depth-first searces into 1*/
148 void GMEcalcDownAverage(node *v, edge *e, double **D, double **A)
152 A[e->head->index][v->index] = D[v->index2][e->head->index2];
155 left = e->head->leftEdge;
156 right = e->head->rightEdge;
157 A[e->head->index][v->index] =
158 ( left->bottomsize * A[left->head->index][v->index] +
159 right->bottomsize * A[right->head->index][v->index])
164 void GMEcalcUpAverage(node *v, edge *e, double **D, double **A)
167 if (NULL == e->tail->parentEdge)
168 A[v->index][e->head->index] = D[v->index2][e->tail->index2];
171 up = e->tail->parentEdge;
172 down = siblingEdge(e);
173 A[v->index][e->head->index] =
174 (up->topsize * A[v->index][up->head->index] +
175 down->bottomsize * A[down->head->index][v->index])
180 /*this function calculates average distance D_Xv for each X which is
181 a set of leaves of an induced subtree of T*/
182 void GMEcalcNewvAverages(tree *T, node *v, double **D, double **A)
185 /*depth-first search*/
188 e = depthFirstTraverse(T,e); /*the downward averages need to be
189 calculated from bottom to top */
192 GMEcalcDownAverage(v,e,D,A);
193 e = depthFirstTraverse(T,e);
196 e = topFirstTraverse(T,e); /*the upward averages need to be calculated
197 from top to bottom */
200 GMEcalcUpAverage(v,e,D,A);
201 e = topFirstTraverse(T,e);
205 double wf4(double lambda, double lambda2, double D_AB, double D_AC,
206 double D_BC, double D_Av, double D_Bv, double D_Cv)
208 return(((1 - lambda) * (D_AC + D_Bv) + (lambda2 - 1)*(D_AB + D_Cv)
209 + (lambda - lambda2)*(D_BC + D_Av)));
213 /*testEdge cacluates what the OLS weight would be if v were inserted into
214 T along e. Compare against known values for inserting along
216 /*edges are tested by a top-first, left-first scheme. we presume
217 all distances are fixed to the correct weight for
218 e->parentEdge, if e is a left-oriented edge*/
219 void testEdge(edge *e, node *v, double **A)
221 double lambda, lambda2;
223 sib = siblingEdge(e);
224 par = e->tail->parentEdge;
225 /*C is set above e->tail, B is set below e, and A is set below sib*/
226 /*following the nomenclature of Desper & Gascuel*/
227 lambda = (((double) (sib->bottomsize + e->bottomsize*par->topsize))
228 / ((1 + par->topsize)*(par->bottomsize)));
229 lambda2 = (((double) (sib->bottomsize + e->bottomsize*par->topsize))
230 / ((1 + e->bottomsize)*(e->topsize)));
231 e->totalweight = par->totalweight
232 + wf4(lambda,lambda2,A[e->head->index][sib->head->index],
233 A[sib->head->index][e->tail->index],
234 A[e->head->index][e->tail->index],
235 A[sib->head->index][v->index],A[e->head->index][v->index],
236 A[v->index][e->tail->index]);
239 void printDoubleTable(double **A, int d)
245 Rprintf("%lf ", A[i][j]);
250 void GMEsplitEdge(tree *T, node *v, edge *e, double **A);
252 tree *GMEaddSpecies(tree *T,node *v, double **D, double **A)
253 /*the key function of the program addSpeices inserts
254 the node v to the tree T. It uses testEdge to see what the
255 weight would be if v split a particular edge. Weights are assigned by
259 edge *e; /*loop variable*/
260 edge *e_min; /*points to best edge seen thus far*/
261 double w_min = 0.0; /*used to keep track of tree weights*/
264 printf("Adding %s.\n",v->label);*/
266 /*initialize variables as necessary*/
267 /*CASE 1: T is empty, v is the first node*/
268 if (NULL == T) /*create a tree with v as only vertex, no edges*/
272 /*note that we are rooting T arbitrarily at a leaf.
273 T->root is not the phylogenetic root*/
278 /*CASE 2: T is a single-vertex tree*/
282 e = makeEdge("",T->root,v,0.0);
283 //sprintf(e->label,"E1");
284 snprintf(e->label,EDGE_LABEL_LENGTH,"E1");
287 A[v->index][v->index] = D[v->index2][T->root->index2];
288 T->root->leftEdge = v->parentEdge = e;
292 /*CASE 3: T has at least two nodes and an edge. Insert new node
293 by breaking one of the edges*/
296 /*if (!(T->size % 100))
297 printf("T->size is %d\n",T->size);*/
298 GMEcalcNewvAverages(T,v,D,A);
299 /*calcNewvAverges will assign values to all the edge averages of T which
300 include the node v. Will do so using pre-existing averages in T and
301 information from A,D*/
302 e_min = T->root->leftEdge;
303 e = e_min->head->leftEdge;
307 /*testEdge tests weight of tree if loop variable
308 e is the edge split, places this weight in e->totalweight field */
309 if (e->totalweight < w_min)
312 w_min = e->totalweight;
314 e = topFirstTraverse(T,e);
316 /*e_min now points at the edge we want to split*/
317 GMEsplitEdge(T,v,e_min,A);
321 void updateSubTreeAverages(double **A, edge *e, node *v, int direction);
323 /*the ME version of updateAveragesMatrix does not update the entire matrix
324 A, but updates A[v->index][w->index] whenever this represents an average
325 of 1-distant or 2-distant subtrees*/
327 void GMEupdateAveragesMatrix(double **A, edge *e, node *v, node *newNode)
329 edge *sib, *par, *left, *right;
330 sib = siblingEdge(e);
331 left = e->head->leftEdge;
332 right = e->head->rightEdge;
333 par = e->tail->parentEdge;
335 /*we need to update the matrix A so all 1-distant, 2-distant, and
336 3-distant averages are correct*/
338 /*first, initialize the newNode entries*/
340 A[newNode->index][newNode->index] =
341 (e->bottomsize*A[e->head->index][e->head->index]
342 + A[v->index][e->head->index])
343 / (e->bottomsize + 1);
345 A[v->index][v->index] =
346 (e->bottomsize*A[e->head->index][v->index]
347 + e->topsize*A[v->index][e->head->index])
348 / (e->bottomsize + e->topsize);
350 /*2-distant for v,newNode*/
351 A[v->index][newNode->index] = A[newNode->index][v->index] =
352 A[v->index][e->head->index];
354 /*second 2-distant for newNode*/
355 A[newNode->index][e->tail->index] = A[e->tail->index][newNode->index]
356 = (e->bottomsize*A[e->head->index][e->tail->index]
357 + A[v->index][e->tail->index])/(e->bottomsize + 1);
358 /*third 2-distant for newNode*/
359 A[newNode->index][e->head->index] = A[e->head->index][newNode->index]
360 = A[e->head->index][e->head->index];
364 /*fourth and last 2-distant for newNode*/
365 A[newNode->index][sib->head->index] =
366 A[sib->head->index][newNode->index] =
367 (e->bottomsize*A[sib->head->index][e->head->index]
368 + A[sib->head->index][v->index]) / (e->bottomsize + 1);
369 updateSubTreeAverages(A,sib,v,SKEW); /*updates sib and below*/
373 if (e->tail->leftEdge == e)
374 updateSubTreeAverages(A,par,v,LEFT); /*updates par and above*/
376 updateSubTreeAverages(A,par,v,RIGHT);
379 updateSubTreeAverages(A,left,v,UP); /*updates left and below*/
381 updateSubTreeAverages(A,right,v,UP); /*updates right and below*/
383 /*1-dist for e->head*/
384 A[e->head->index][e->head->index] =
385 (e->topsize*A[e->head->index][e->head->index]
386 + A[e->head->index][v->index]) / (e->topsize+1);
387 /*2-dist for e->head (v,newNode,left,right)
388 taken care of elsewhere*/
389 /*3-dist with e->head either taken care of elsewhere (below)
390 or unchanged (sib,e->tail)*/
392 /*symmetrize the matrix (at least for distant-2 subtrees) */
393 A[v->index][e->head->index] = A[e->head->index][v->index];
394 /*and distant-3 subtrees*/
395 A[e->tail->index][v->index] = A[v->index][e->tail->index];
397 A[v->index][left->head->index] = A[left->head->index][v->index];
399 A[v->index][right->head->index] = A[right->head->index][v->index];
401 A[v->index][sib->head->index] = A[sib->head->index][v->index];
405 void GMEsplitEdge(tree *T, node *v, edge *e, double **A)
407 int nodelabel = 0;//char nodelabel[NODE_LABEL_LENGTH];
408 char edgelabel[EDGE_LABEL_LENGTH];
409 edge *newPendantEdge;
410 edge *newInternalEdge;
413 //snprintf(nodelabel,1,"");
414 newNode = makeNewNode(nodelabel,T->size + 1);
416 //sprintf(edgelabel,"E%d",T->size);
417 snprintf(edgelabel,EDGE_LABEL_LENGTH,"E%d",T->size);
418 newPendantEdge = makeEdge(edgelabel,newNode,v,0.0);
420 //sprintf(edgelabel,"E%d",T->size+1);
421 snprintf(edgelabel,EDGE_LABEL_LENGTH,"E%d",T->size+1);
422 newInternalEdge = makeEdge(edgelabel,newNode,e->head,0.0);
425 printf("Inserting node %s on edge %s between nodes %s and %s.\n",
426 v->label,e->label,e->tail->label,e->head->label);*/
427 /*update the matrix of average distances*/
428 /*also updates the bottomsize, topsize fields*/
430 GMEupdateAveragesMatrix(A,e,v,newNode);
432 newNode->parentEdge = e;
433 e->head->parentEdge = newInternalEdge;
434 v->parentEdge = newPendantEdge;
437 T->size = T->size + 2;
439 if (e->tail->leftEdge == e)
441 newNode->leftEdge = newInternalEdge;
442 newNode->rightEdge = newPendantEdge;
446 newNode->leftEdge = newInternalEdge;
447 newNode->rightEdge = newPendantEdge;
450 /*assign proper topsize, bottomsize values to the two new Edges*/
452 newPendantEdge->bottomsize = 1;
453 newPendantEdge->topsize = e->bottomsize + e->topsize;
455 newInternalEdge->bottomsize = e->bottomsize;
456 newInternalEdge->topsize = e->topsize; /*off by one, but we adjust
459 /*and increment these fields for all other edges*/
460 updateSizes(newInternalEdge,UP);
464 void updateSubTreeAverages(double **A, edge *e, node *v, int direction)
465 /*the monster function of this program*/
467 edge *sib, *left, *right, *par;
468 left = e->head->leftEdge;
469 right = e->head->rightEdge;
470 sib = siblingEdge(e);
471 par = e->tail->parentEdge;
474 /*want to preserve correctness of
475 all 1-distant, 2-distant, and 3-distant averages*/
476 /*1-distant updated at edge splitting the two trees*/
478 (left->head,right->head) and (head,tail) updated at
479 a given edge. Note, NOT updating (head,sib->head)!
480 (That would lead to multiple updating).*/
481 /*3-distant updated: at edge in center of quartet*/
482 case UP: /*point of insertion is above e*/
483 /*1-distant average of nodes below e to
485 A[e->head->index][e->head->index] =
486 (e->topsize*A[e->head->index][e->head->index] +
487 A[e->head->index][v->index])/(e->topsize + 1);
488 /*2-distant average of nodes below e to
489 nodes above parent of e*/
490 A[e->head->index][par->head->index] =
491 A[par->head->index][e->head->index] =
492 (par->topsize*A[par->head->index][e->head->index]
493 + A[e->head->index][v->index]) / (par->topsize + 1);
494 /*must do both 3-distant averages involving par*/
497 updateSubTreeAverages(A, left, v, UP); /*and recursive call*/
498 /*3-distant average*/
499 A[par->head->index][left->head->index]
500 = A[left->head->index][par->head->index]
501 = (par->topsize*A[par->head->index][left->head->index]
502 + A[left->head->index][v->index]) / (par->topsize + 1);
506 updateSubTreeAverages(A, right, v, UP);
507 A[par->head->index][right->head->index]
508 = A[right->head->index][par->head->index]
509 = (par->topsize*A[par->head->index][right->head->index]
510 + A[right->head->index][v->index]) / (par->topsize + 1);
513 case SKEW: /*point of insertion is skew to e*/
514 /*1-distant average of nodes below e to
516 A[e->head->index][e->head->index] =
517 (e->topsize*A[e->head->index][e->head->index] +
518 A[e->head->index][v->index])/(e->topsize + 1);
519 /*no 2-distant averages to update in this case*/
520 /*updating 3-distant averages involving sib*/
523 updateSubTreeAverages(A, left, v, UP);
524 A[sib->head->index][left->head->index]
525 = A[left->head->index][sib->head->index]
526 = (sib->bottomsize*A[sib->head->index][left->head->index]
527 + A[left->head->index][v->index]) / (sib->bottomsize + 1);
531 updateSubTreeAverages(A, right, v, UP);
532 A[sib->head->index][right->head->index]
533 = A[right->head->index][sib->head->index]
534 = (sib->bottomsize*A[par->head->index][right->head->index]
535 + A[right->head->index][v->index]) / (sib->bottomsize + 1);
540 case LEFT: /*point of insertion is below the edge left*/
541 /*1-distant average*/
542 A[e->head->index][e->head->index] =
543 (e->bottomsize*A[e->head->index][e->head->index] +
544 A[v->index][e->head->index])/(e->bottomsize + 1);
545 /*2-distant averages*/
546 A[e->head->index][e->tail->index] =
547 A[e->tail->index][e->head->index] =
548 (e->bottomsize*A[e->head->index][e->tail->index] +
549 A[v->index][e->tail->index])/(e->bottomsize + 1);
550 A[left->head->index][right->head->index] =
551 A[right->head->index][left->head->index] =
552 (left->bottomsize*A[right->head->index][left->head->index]
553 + A[right->head->index][v->index]) / (left->bottomsize+1);
554 /*3-distant avereages involving left*/
557 updateSubTreeAverages(A, sib, v, SKEW);
558 A[left->head->index][sib->head->index]
559 = A[sib->head->index][left->head->index]
560 = (left->bottomsize*A[left->head->index][sib->head->index]
561 + A[sib->head->index][v->index]) / (left->bottomsize + 1);
565 if (e->tail->leftEdge == e)
566 updateSubTreeAverages(A, par, v, LEFT);
568 updateSubTreeAverages(A, par, v, RIGHT);
569 A[left->head->index][par->head->index]
570 = A[par->head->index][left->head->index]
571 = (left->bottomsize*A[left->head->index][par->head->index]
572 + A[v->index][par->head->index]) / (left->bottomsize + 1);
575 case RIGHT: /*point of insertion is below the edge right*/
576 /*1-distant average*/
577 A[e->head->index][e->head->index] =
578 (e->bottomsize*A[e->head->index][e->head->index] +
579 A[v->index][e->head->index])/(e->bottomsize + 1);
580 /*2-distant averages*/
581 A[e->head->index][e->tail->index] =
582 A[e->tail->index][e->head->index] =
583 (e->bottomsize*A[e->head->index][e->tail->index] +
584 A[v->index][e->tail->index])/(e->bottomsize + 1);
585 A[left->head->index][right->head->index] =
586 A[right->head->index][left->head->index] =
587 (right->bottomsize*A[right->head->index][left->head->index]
588 + A[left->head->index][v->index]) / (right->bottomsize+1);
589 /*3-distant avereages involving right*/
592 updateSubTreeAverages(A, sib, v, SKEW);
593 A[right->head->index][sib->head->index]
594 = A[sib->head->index][right->head->index]
595 = (right->bottomsize*A[right->head->index][sib->head->index]
596 + A[sib->head->index][v->index]) / (right->bottomsize + 1);
600 if (e->tail->leftEdge == e)
601 updateSubTreeAverages(A, par, v, LEFT);
603 updateSubTreeAverages(A, par, v, RIGHT);
604 A[right->head->index][par->head->index]
605 = A[par->head->index][right->head->index]
606 = (right->bottomsize*A[right->head->index][par->head->index]
607 + A[v->index][par->head->index]) / (right->bottomsize + 1);
614 void assignBottomsize(edge *e)
620 assignBottomsize(e->head->leftEdge);
621 assignBottomsize(e->head->rightEdge);
622 e->bottomsize = e->head->leftEdge->bottomsize
623 + e->head->rightEdge->bottomsize;
627 void assignTopsize(edge *e, int numLeaves)
631 e->topsize = numLeaves - e->bottomsize;
632 assignTopsize(e->head->leftEdge,numLeaves);
633 assignTopsize(e->head->rightEdge,numLeaves);
637 void assignAllSizeFields(tree *T)
639 assignBottomsize(T->root->leftEdge);
640 assignTopsize(T->root->leftEdge,T->size/2 + 1);