4 void fillTableUp(edge *e, edge *f, double **A, double **D, tree *T);
6 /*OLSint and OLSext use the average distance array to calculate weights
7 instead of using the edge average weight fields*/
9 void OLSext(edge *e, double **A)
15 e->distance = 0.5*(A[e->head->index][e->tail->index]
16 + A[e->head->index][f->head->index]
17 - A[f->head->index][e->tail->index]);
21 f = e->head->leftEdge;
22 g = e->head->rightEdge;
23 e->distance = 0.5*(A[e->head->index][f->head->index]
24 + A[e->head->index][g->head->index]
25 - A[f->head->index][g->head->index]);
29 double wf(double lambda, double D_LR, double D_LU, double D_LD,
30 double D_RU, double D_RD, double D_DU)
33 weight = 0.5*(lambda*(D_LU + D_RD) + (1 -lambda)*(D_LD + D_RU)
38 void OLSint(edge *e, double **A)
41 edge *left, *right, *sib;
42 left = e->head->leftEdge;
43 right = e->head->rightEdge;
45 lambda = ((double) sib->bottomsize*left->bottomsize +
46 right->bottomsize*e->tail->parentEdge->topsize) /
47 (e->bottomsize*e->topsize);
48 e->distance = wf(lambda,A[left->head->index][right->head->index],
49 A[left->head->index][e->tail->index],
50 A[left->head->index][sib->head->index],
51 A[right->head->index][e->tail->index],
52 A[right->head->index][sib->head->index],
53 A[sib->head->index][e->tail->index]);
57 void assignOLSWeights(tree *T, double **A)
60 e = depthFirstTraverse(T,NULL);
62 if ((leaf(e->head)) || (leaf(e->tail)))
66 e = depthFirstTraverse(T,e);
70 /*makes table of average distances from scratch*/
71 void makeOLSAveragesTable(tree *T, double **D, double **A)
76 e = depthFirstTraverse(T,e);
80 exclude = e->tail->parentEdge;
81 /*we want to calculate A[e->head][f->head] for all edges
82 except those edges which are ancestral to e. For those
83 edges, we will calculate A[e->head][f->head] to have a
84 different meaning, later*/
91 A[e->head->index][f->head->index] = A[f->head->index][e->head->index] = D[e->head->index2][f->head->index2];
94 g = f->head->leftEdge;
95 h = f->head->rightEdge;
96 A[e->head->index][f->head->index] = A[f->head->index][e->head->index] = (g->bottomsize*A[e->head->index][g->head->index] + h->bottomsize*A[e->head->index][h->head->index])/f->bottomsize;
100 exclude = exclude->tail->parentEdge;
101 f = depthFirstTraverse(T,f);
104 /*e->head is not a leaf, so we go recursively to values calculated for
110 g = e->head->leftEdge;
111 h = e->head->rightEdge;
112 A[e->head->index][f->head->index] = A[f->head->index][e->head->index] = (g->bottomsize*A[f->head->index][g->head->index] + h->bottomsize*A[f->head->index][h->head->index])/e->bottomsize;
115 exclude = exclude->tail->parentEdge;
116 f = depthFirstTraverse(T,f);
119 /*now we move to fill up the rest of the table: we want
120 A[e->head->index][f->head->index] for those cases where e is an
121 ancestor of f, or vice versa. We'll do this by choosing e via a
122 depth first-search, and the recursing for f up the path to the
124 f = e->tail->parentEdge;
126 fillTableUp(e,f,A,D,T);
127 e = depthFirstTraverse(T,e);
130 /*we are indexing this table by vertex indices, but really the
131 underlying object is the edge set. Thus, the array is one element
132 too big in each direction, but we'll ignore the entries involving the root,
133 and instead refer to each edge by the head of that edge. The head of
134 the root points to the edge ancestral to the rest of the tree, so
135 we'll keep track of up distances by pointing to that head*/
137 /*10/13/2001: collapsed three depth-first searces into 1*/
140 void GMEcalcDownAverage(node *v, edge *e, double **D, double **A)
144 A[e->head->index][v->index] = D[v->index2][e->head->index2];
147 left = e->head->leftEdge;
148 right = e->head->rightEdge;
149 A[e->head->index][v->index] =
150 ( left->bottomsize * A[left->head->index][v->index] +
151 right->bottomsize * A[right->head->index][v->index])
156 void GMEcalcUpAverage(node *v, edge *e, double **D, double **A)
159 if (NULL == e->tail->parentEdge)
160 A[v->index][e->head->index] = D[v->index2][e->tail->index2];
163 up = e->tail->parentEdge;
164 down = siblingEdge(e);
165 A[v->index][e->head->index] =
166 (up->topsize * A[v->index][up->head->index] +
167 down->bottomsize * A[down->head->index][v->index])
172 /*this function calculates average distance D_Xv for each X which is
173 a set of leaves of an induced subtree of T*/
174 void GMEcalcNewvAverages(tree *T, node *v, double **D, double **A)
177 /*depth-first search*/
180 e = depthFirstTraverse(T,e); /*the downward averages need to be
181 calculated from bottom to top */
184 GMEcalcDownAverage(v,e,D,A);
185 e = depthFirstTraverse(T,e);
188 e = topFirstTraverse(T,e); /*the upward averages need to be calculated
189 from top to bottom */
192 GMEcalcUpAverage(v,e,D,A);
193 e = topFirstTraverse(T,e);
197 double wf4(double lambda, double lambda2, double D_AB, double D_AC,
198 double D_BC, double D_Av, double D_Bv, double D_Cv)
200 return(((1 - lambda) * (D_AC + D_Bv) + (lambda2 - 1)*(D_AB + D_Cv)
201 + (lambda - lambda2)*(D_BC + D_Av)));
205 /*testEdge cacluates what the OLS weight would be if v were inserted into
206 T along e. Compare against known values for inserting along
208 /*edges are tested by a top-first, left-first scheme. we presume
209 all distances are fixed to the correct weight for
210 e->parentEdge, if e is a left-oriented edge*/
211 void testEdge(edge *e, node *v, double **A)
213 double lambda, lambda2;
215 sib = siblingEdge(e);
216 par = e->tail->parentEdge;
217 /*C is set above e->tail, B is set below e, and A is set below sib*/
218 /*following the nomenclature of Desper & Gascuel*/
219 lambda = (((double) (sib->bottomsize + e->bottomsize*par->topsize))
220 / ((1 + par->topsize)*(par->bottomsize)));
221 lambda2 = (((double) (sib->bottomsize + e->bottomsize*par->topsize))
222 / ((1 + e->bottomsize)*(e->topsize)));
223 e->totalweight = par->totalweight
224 + wf4(lambda,lambda2,A[e->head->index][sib->head->index],
225 A[sib->head->index][e->tail->index],
226 A[e->head->index][e->tail->index],
227 A[sib->head->index][v->index],A[e->head->index][v->index],
228 A[v->index][e->tail->index]);
231 void printDoubleTable(double **A, int d)
237 printf("%lf ", A[i][j]);
242 void GMEsplitEdge(tree *T, node *v, edge *e, double **A);
244 tree *GMEaddSpecies(tree *T,node *v, double **D, double **A)
245 /*the key function of the program addSpeices inserts
246 the node v to the tree T. It uses testEdge to see what the
247 weight would be if v split a particular edge. Weights are assigned by
251 edge *e; /*loop variable*/
252 edge *e_min; /*points to best edge seen thus far*/
253 double w_min = 0.0; /*used to keep track of tree weights*/
256 printf("Adding %s.\n",v->label);*/
258 /*initialize variables as necessary*/
259 /*CASE 1: T is empty, v is the first node*/
260 if (NULL == T) /*create a tree with v as only vertex, no edges*/
264 /*note that we are rooting T arbitrarily at a leaf.
265 T->root is not the phylogenetic root*/
270 /*CASE 2: T is a single-vertex tree*/
274 e = makeEdge("",T->root,v,0.0);
275 //sprintf(e->label,"E1");
276 snprintf(e->label,EDGE_LABEL_LENGTH,"E1");
279 A[v->index][v->index] = D[v->index2][T->root->index2];
280 T->root->leftEdge = v->parentEdge = e;
284 /*CASE 3: T has at least two nodes and an edge. Insert new node
285 by breaking one of the edges*/
288 /*if (!(T->size % 100))
289 printf("T->size is %d\n",T->size);*/
290 GMEcalcNewvAverages(T,v,D,A);
291 /*calcNewvAverges will assign values to all the edge averages of T which
292 include the node v. Will do so using pre-existing averages in T and
293 information from A,D*/
294 e_min = T->root->leftEdge;
295 e = e_min->head->leftEdge;
299 /*testEdge tests weight of tree if loop variable
300 e is the edge split, places this weight in e->totalweight field */
301 if (e->totalweight < w_min)
304 w_min = e->totalweight;
306 e = topFirstTraverse(T,e);
308 /*e_min now points at the edge we want to split*/
309 GMEsplitEdge(T,v,e_min,A);
313 void updateSubTreeAverages(double **A, edge *e, node *v, int direction);
315 /*the ME version of updateAveragesMatrix does not update the entire matrix
316 A, but updates A[v->index][w->index] whenever this represents an average
317 of 1-distant or 2-distant subtrees*/
319 void GMEupdateAveragesMatrix(double **A, edge *e, node *v, node *newNode)
321 edge *sib, *par, *left, *right;
322 sib = siblingEdge(e);
323 left = e->head->leftEdge;
324 right = e->head->rightEdge;
325 par = e->tail->parentEdge;
327 /*we need to update the matrix A so all 1-distant, 2-distant, and
328 3-distant averages are correct*/
330 /*first, initialize the newNode entries*/
332 A[newNode->index][newNode->index] =
333 (e->bottomsize*A[e->head->index][e->head->index]
334 + A[v->index][e->head->index])
335 / (e->bottomsize + 1);
337 A[v->index][v->index] =
338 (e->bottomsize*A[e->head->index][v->index]
339 + e->topsize*A[v->index][e->head->index])
340 / (e->bottomsize + e->topsize);
342 /*2-distant for v,newNode*/
343 A[v->index][newNode->index] = A[newNode->index][v->index] =
344 A[v->index][e->head->index];
346 /*second 2-distant for newNode*/
347 A[newNode->index][e->tail->index] = A[e->tail->index][newNode->index]
348 = (e->bottomsize*A[e->head->index][e->tail->index]
349 + A[v->index][e->tail->index])/(e->bottomsize + 1);
350 /*third 2-distant for newNode*/
351 A[newNode->index][e->head->index] = A[e->head->index][newNode->index]
352 = A[e->head->index][e->head->index];
356 /*fourth and last 2-distant for newNode*/
357 A[newNode->index][sib->head->index] =
358 A[sib->head->index][newNode->index] =
359 (e->bottomsize*A[sib->head->index][e->head->index]
360 + A[sib->head->index][v->index]) / (e->bottomsize + 1);
361 updateSubTreeAverages(A,sib,v,SKEW); /*updates sib and below*/
365 if (e->tail->leftEdge == e)
366 updateSubTreeAverages(A,par,v,LEFT); /*updates par and above*/
368 updateSubTreeAverages(A,par,v,RIGHT);
371 updateSubTreeAverages(A,left,v,UP); /*updates left and below*/
373 updateSubTreeAverages(A,right,v,UP); /*updates right and below*/
375 /*1-dist for e->head*/
376 A[e->head->index][e->head->index] =
377 (e->topsize*A[e->head->index][e->head->index]
378 + A[e->head->index][v->index]) / (e->topsize+1);
379 /*2-dist for e->head (v,newNode,left,right)
380 taken care of elsewhere*/
381 /*3-dist with e->head either taken care of elsewhere (below)
382 or unchanged (sib,e->tail)*/
384 /*symmetrize the matrix (at least for distant-2 subtrees) */
385 A[v->index][e->head->index] = A[e->head->index][v->index];
386 /*and distant-3 subtrees*/
387 A[e->tail->index][v->index] = A[v->index][e->tail->index];
389 A[v->index][left->head->index] = A[left->head->index][v->index];
391 A[v->index][right->head->index] = A[right->head->index][v->index];
393 A[v->index][sib->head->index] = A[sib->head->index][v->index];
397 void GMEsplitEdge(tree *T, node *v, edge *e, double **A)
399 char nodelabel[NODE_LABEL_LENGTH];
400 char edgelabel[EDGE_LABEL_LENGTH];
401 edge *newPendantEdge;
402 edge *newInternalEdge;
405 snprintf(nodelabel,1,"");
406 newNode = makeNewNode(nodelabel,T->size + 1);
408 //sprintf(edgelabel,"E%d",T->size);
409 snprintf(edgelabel,EDGE_LABEL_LENGTH,"E%d",T->size);
410 newPendantEdge = makeEdge(edgelabel,newNode,v,0.0);
412 //sprintf(edgelabel,"E%d",T->size+1);
413 snprintf(edgelabel,EDGE_LABEL_LENGTH,"E%d",T->size+1);
414 newInternalEdge = makeEdge(edgelabel,newNode,e->head,0.0);
417 printf("Inserting node %s on edge %s between nodes %s and %s.\n",
418 v->label,e->label,e->tail->label,e->head->label);*/
419 /*update the matrix of average distances*/
420 /*also updates the bottomsize, topsize fields*/
422 GMEupdateAveragesMatrix(A,e,v,newNode);
424 newNode->parentEdge = e;
425 e->head->parentEdge = newInternalEdge;
426 v->parentEdge = newPendantEdge;
429 T->size = T->size + 2;
431 if (e->tail->leftEdge == e)
433 newNode->leftEdge = newInternalEdge;
434 newNode->rightEdge = newPendantEdge;
438 newNode->leftEdge = newInternalEdge;
439 newNode->rightEdge = newPendantEdge;
442 /*assign proper topsize, bottomsize values to the two new Edges*/
444 newPendantEdge->bottomsize = 1;
445 newPendantEdge->topsize = e->bottomsize + e->topsize;
447 newInternalEdge->bottomsize = e->bottomsize;
448 newInternalEdge->topsize = e->topsize; /*off by one, but we adjust
451 /*and increment these fields for all other edges*/
452 updateSizes(newInternalEdge,UP);
456 void updateSubTreeAverages(double **A, edge *e, node *v, int direction)
457 /*the monster function of this program*/
459 edge *sib, *left, *right, *par;
460 left = e->head->leftEdge;
461 right = e->head->rightEdge;
462 sib = siblingEdge(e);
463 par = e->tail->parentEdge;
466 /*want to preserve correctness of
467 all 1-distant, 2-distant, and 3-distant averages*/
468 /*1-distant updated at edge splitting the two trees*/
470 (left->head,right->head) and (head,tail) updated at
471 a given edge. Note, NOT updating (head,sib->head)!
472 (That would lead to multiple updating).*/
473 /*3-distant updated: at edge in center of quartet*/
474 case UP: /*point of insertion is above e*/
475 /*1-distant average of nodes below e to
477 A[e->head->index][e->head->index] =
478 (e->topsize*A[e->head->index][e->head->index] +
479 A[e->head->index][v->index])/(e->topsize + 1);
480 /*2-distant average of nodes below e to
481 nodes above parent of e*/
482 A[e->head->index][par->head->index] =
483 A[par->head->index][e->head->index] =
484 (par->topsize*A[par->head->index][e->head->index]
485 + A[e->head->index][v->index]) / (par->topsize + 1);
486 /*must do both 3-distant averages involving par*/
489 updateSubTreeAverages(A, left, v, UP); /*and recursive call*/
490 /*3-distant average*/
491 A[par->head->index][left->head->index]
492 = A[left->head->index][par->head->index]
493 = (par->topsize*A[par->head->index][left->head->index]
494 + A[left->head->index][v->index]) / (par->topsize + 1);
498 updateSubTreeAverages(A, right, v, UP);
499 A[par->head->index][right->head->index]
500 = A[right->head->index][par->head->index]
501 = (par->topsize*A[par->head->index][right->head->index]
502 + A[right->head->index][v->index]) / (par->topsize + 1);
505 case SKEW: /*point of insertion is skew to e*/
506 /*1-distant average of nodes below e to
508 A[e->head->index][e->head->index] =
509 (e->topsize*A[e->head->index][e->head->index] +
510 A[e->head->index][v->index])/(e->topsize + 1);
511 /*no 2-distant averages to update in this case*/
512 /*updating 3-distant averages involving sib*/
515 updateSubTreeAverages(A, left, v, UP);
516 A[sib->head->index][left->head->index]
517 = A[left->head->index][sib->head->index]
518 = (sib->bottomsize*A[sib->head->index][left->head->index]
519 + A[left->head->index][v->index]) / (sib->bottomsize + 1);
523 updateSubTreeAverages(A, right, v, UP);
524 A[sib->head->index][right->head->index]
525 = A[right->head->index][sib->head->index]
526 = (sib->bottomsize*A[par->head->index][right->head->index]
527 + A[right->head->index][v->index]) / (sib->bottomsize + 1);
532 case LEFT: /*point of insertion is below the edge left*/
533 /*1-distant average*/
534 A[e->head->index][e->head->index] =
535 (e->bottomsize*A[e->head->index][e->head->index] +
536 A[v->index][e->head->index])/(e->bottomsize + 1);
537 /*2-distant averages*/
538 A[e->head->index][e->tail->index] =
539 A[e->tail->index][e->head->index] =
540 (e->bottomsize*A[e->head->index][e->tail->index] +
541 A[v->index][e->tail->index])/(e->bottomsize + 1);
542 A[left->head->index][right->head->index] =
543 A[right->head->index][left->head->index] =
544 (left->bottomsize*A[right->head->index][left->head->index]
545 + A[right->head->index][v->index]) / (left->bottomsize+1);
546 /*3-distant avereages involving left*/
549 updateSubTreeAverages(A, sib, v, SKEW);
550 A[left->head->index][sib->head->index]
551 = A[sib->head->index][left->head->index]
552 = (left->bottomsize*A[left->head->index][sib->head->index]
553 + A[sib->head->index][v->index]) / (left->bottomsize + 1);
557 if (e->tail->leftEdge == e)
558 updateSubTreeAverages(A, par, v, LEFT);
560 updateSubTreeAverages(A, par, v, RIGHT);
561 A[left->head->index][par->head->index]
562 = A[par->head->index][left->head->index]
563 = (left->bottomsize*A[left->head->index][par->head->index]
564 + A[v->index][par->head->index]) / (left->bottomsize + 1);
567 case RIGHT: /*point of insertion is below the edge right*/
568 /*1-distant average*/
569 A[e->head->index][e->head->index] =
570 (e->bottomsize*A[e->head->index][e->head->index] +
571 A[v->index][e->head->index])/(e->bottomsize + 1);
572 /*2-distant averages*/
573 A[e->head->index][e->tail->index] =
574 A[e->tail->index][e->head->index] =
575 (e->bottomsize*A[e->head->index][e->tail->index] +
576 A[v->index][e->tail->index])/(e->bottomsize + 1);
577 A[left->head->index][right->head->index] =
578 A[right->head->index][left->head->index] =
579 (right->bottomsize*A[right->head->index][left->head->index]
580 + A[left->head->index][v->index]) / (right->bottomsize+1);
581 /*3-distant avereages involving right*/
584 updateSubTreeAverages(A, sib, v, SKEW);
585 A[right->head->index][sib->head->index]
586 = A[sib->head->index][right->head->index]
587 = (right->bottomsize*A[right->head->index][sib->head->index]
588 + A[sib->head->index][v->index]) / (right->bottomsize + 1);
592 if (e->tail->leftEdge == e)
593 updateSubTreeAverages(A, par, v, LEFT);
595 updateSubTreeAverages(A, par, v, RIGHT);
596 A[right->head->index][par->head->index]
597 = A[par->head->index][right->head->index]
598 = (right->bottomsize*A[right->head->index][par->head->index]
599 + A[v->index][par->head->index]) / (right->bottomsize + 1);
606 void assignBottomsize(edge *e)
612 assignBottomsize(e->head->leftEdge);
613 assignBottomsize(e->head->rightEdge);
614 e->bottomsize = e->head->leftEdge->bottomsize
615 + e->head->rightEdge->bottomsize;
619 void assignTopsize(edge *e, int numLeaves)
623 e->topsize = numLeaves - e->bottomsize;
624 assignTopsize(e->head->leftEdge,numLeaves);
625 assignTopsize(e->head->rightEdge,numLeaves);
629 void assignAllSizeFields(tree *T)
631 assignBottomsize(T->root->leftEdge);
632 assignTopsize(T->root->leftEdge,T->size/2 + 1);