7 void fillTableUp(edge *e, edge *f, double **A, double **D, tree *T);
9 /*OLSint and OLSext use the average distance array to calculate weights
10 instead of using the edge average weight fields*/
12 void OLSext(edge *e, double **A)
18 e->distance = 0.5*(A[e->head->index][e->tail->index]
19 + A[e->head->index][f->head->index]
20 - A[f->head->index][e->tail->index]);
24 f = e->head->leftEdge;
25 g = e->head->rightEdge;
26 e->distance = 0.5*(A[e->head->index][f->head->index]
27 + A[e->head->index][g->head->index]
28 - A[f->head->index][g->head->index]);
32 double wf(double lambda, double D_LR, double D_LU, double D_LD,
33 double D_RU, double D_RD, double D_DU)
36 weight = 0.5*(lambda*(D_LU + D_RD) + (1 -lambda)*(D_LD + D_RU)
41 void OLSint(edge *e, double **A)
44 edge *left, *right, *sib;
45 left = e->head->leftEdge;
46 right = e->head->rightEdge;
48 lambda = ((double) sib->bottomsize*left->bottomsize +
49 right->bottomsize*e->tail->parentEdge->topsize) /
50 (e->bottomsize*e->topsize);
51 e->distance = wf(lambda,A[left->head->index][right->head->index],
52 A[left->head->index][e->tail->index],
53 A[left->head->index][sib->head->index],
54 A[right->head->index][e->tail->index],
55 A[right->head->index][sib->head->index],
56 A[sib->head->index][e->tail->index]);
60 void assignOLSWeights(tree *T, double **A)
63 e = depthFirstTraverse(T,NULL);
65 if ((leaf(e->head)) || (leaf(e->tail)))
69 e = depthFirstTraverse(T,e);
73 /*makes table of average distances from scratch*/
74 void makeOLSAveragesTable(tree *T, double **D, double **A)
79 e = depthFirstTraverse(T,e);
83 exclude = e->tail->parentEdge;
84 /*we want to calculate A[e->head][f->head] for all edges
85 except those edges which are ancestral to e. For those
86 edges, we will calculate A[e->head][f->head] to have a
87 different meaning, later*/
94 A[e->head->index][f->head->index] = A[f->head->index][e->head->index] = D[e->head->index2][f->head->index2];
97 g = f->head->leftEdge;
98 h = f->head->rightEdge;
99 A[e->head->index][f->head->index] = A[f->head->index][e->head->index] = (g->bottomsize*A[e->head->index][g->head->index] + h->bottomsize*A[e->head->index][h->head->index])/f->bottomsize;
102 else /*exclude == f*/
103 exclude = exclude->tail->parentEdge;
104 f = depthFirstTraverse(T,f);
107 /*e->head is not a leaf, so we go recursively to values calculated for
113 g = e->head->leftEdge;
114 h = e->head->rightEdge;
115 A[e->head->index][f->head->index] = A[f->head->index][e->head->index] = (g->bottomsize*A[f->head->index][g->head->index] + h->bottomsize*A[f->head->index][h->head->index])/e->bottomsize;
118 exclude = exclude->tail->parentEdge;
119 f = depthFirstTraverse(T,f);
122 /*now we move to fill up the rest of the table: we want
123 A[e->head->index][f->head->index] for those cases where e is an
124 ancestor of f, or vice versa. We'll do this by choosing e via a
125 depth first-search, and the recursing for f up the path to the
127 f = e->tail->parentEdge;
129 fillTableUp(e,f,A,D,T);
130 e = depthFirstTraverse(T,e);
133 /*we are indexing this table by vertex indices, but really the
134 underlying object is the edge set. Thus, the array is one element
135 too big in each direction, but we'll ignore the entries involving the root,
136 and instead refer to each edge by the head of that edge. The head of
137 the root points to the edge ancestral to the rest of the tree, so
138 we'll keep track of up distances by pointing to that head*/
140 /*10/13/2001: collapsed three depth-first searces into 1*/
143 void GMEcalcDownAverage(node *v, edge *e, double **D, double **A)
147 A[e->head->index][v->index] = D[v->index2][e->head->index2];
150 left = e->head->leftEdge;
151 right = e->head->rightEdge;
152 A[e->head->index][v->index] =
153 ( left->bottomsize * A[left->head->index][v->index] +
154 right->bottomsize * A[right->head->index][v->index])
159 void GMEcalcUpAverage(node *v, edge *e, double **D, double **A)
162 if (NULL == e->tail->parentEdge)
163 A[v->index][e->head->index] = D[v->index2][e->tail->index2];
166 up = e->tail->parentEdge;
167 down = siblingEdge(e);
168 A[v->index][e->head->index] =
169 (up->topsize * A[v->index][up->head->index] +
170 down->bottomsize * A[down->head->index][v->index])
175 /*this function calculates average distance D_Xv for each X which is
176 a set of leaves of an induced subtree of T*/
177 void GMEcalcNewvAverages(tree *T, node *v, double **D, double **A)
180 /*depth-first search*/
183 e = depthFirstTraverse(T,e); /*the downward averages need to be
184 calculated from bottom to top */
187 GMEcalcDownAverage(v,e,D,A);
188 e = depthFirstTraverse(T,e);
191 e = topFirstTraverse(T,e); /*the upward averages need to be calculated
192 from top to bottom */
195 GMEcalcUpAverage(v,e,D,A);
196 e = topFirstTraverse(T,e);
200 double wf4(double lambda, double lambda2, double D_AB, double D_AC,
201 double D_BC, double D_Av, double D_Bv, double D_Cv)
203 return(((1 - lambda) * (D_AC + D_Bv) + (lambda2 - 1)*(D_AB + D_Cv)
204 + (lambda - lambda2)*(D_BC + D_Av)));
208 /*testEdge cacluates what the OLS weight would be if v were inserted into
209 T along e. Compare against known values for inserting along
211 /*edges are tested by a top-first, left-first scheme. we presume
212 all distances are fixed to the correct weight for
213 e->parentEdge, if e is a left-oriented edge*/
214 void testEdge(edge *e, node *v, double **A)
216 double lambda, lambda2;
218 sib = siblingEdge(e);
219 par = e->tail->parentEdge;
220 /*C is set above e->tail, B is set below e, and A is set below sib*/
221 /*following the nomenclature of Desper & Gascuel*/
222 lambda = (((double) (sib->bottomsize + e->bottomsize*par->topsize))
223 / ((1 + par->topsize)*(par->bottomsize)));
224 lambda2 = (((double) (sib->bottomsize + e->bottomsize*par->topsize))
225 / ((1 + e->bottomsize)*(e->topsize)));
226 e->totalweight = par->totalweight
227 + wf4(lambda,lambda2,A[e->head->index][sib->head->index],
228 A[sib->head->index][e->tail->index],
229 A[e->head->index][e->tail->index],
230 A[sib->head->index][v->index],A[e->head->index][v->index],
231 A[v->index][e->tail->index]);
234 void printDoubleTable(double **A, int d)
240 printf("%lf ", A[i][j]);
245 void GMEsplitEdge(tree *T, node *v, edge *e, double **A);
247 tree *GMEaddSpecies(tree *T,node *v, double **D, double **A)
248 /*the key function of the program addSpeices inserts
249 the node v to the tree T. It uses testEdge to see what the
250 weight would be if v split a particular edge. Weights are assigned by
254 edge *e; /*loop variable*/
255 edge *e_min; /*points to best edge seen thus far*/
256 double w_min = 0.0; /*used to keep track of tree weights*/
259 printf("Adding %s.\n",v->label);*/
261 /*initialize variables as necessary*/
262 /*CASE 1: T is empty, v is the first node*/
263 if (NULL == T) /*create a tree with v as only vertex, no edges*/
267 /*note that we are rooting T arbitrarily at a leaf.
268 T->root is not the phylogenetic root*/
273 /*CASE 2: T is a single-vertex tree*/
277 e = makeEdge("",T->root,v,0.0);
278 //sprintf(e->label,"E1");
279 snprintf(e->label,EDGE_LABEL_LENGTH,"E1");
282 A[v->index][v->index] = D[v->index2][T->root->index2];
283 T->root->leftEdge = v->parentEdge = e;
287 /*CASE 3: T has at least two nodes and an edge. Insert new node
288 by breaking one of the edges*/
291 /*if (!(T->size % 100))
292 printf("T->size is %d\n",T->size);*/
293 GMEcalcNewvAverages(T,v,D,A);
294 /*calcNewvAverges will assign values to all the edge averages of T which
295 include the node v. Will do so using pre-existing averages in T and
296 information from A,D*/
297 e_min = T->root->leftEdge;
298 e = e_min->head->leftEdge;
302 /*testEdge tests weight of tree if loop variable
303 e is the edge split, places this weight in e->totalweight field */
304 if (e->totalweight < w_min)
307 w_min = e->totalweight;
309 e = topFirstTraverse(T,e);
311 /*e_min now points at the edge we want to split*/
312 GMEsplitEdge(T,v,e_min,A);
316 void updateSubTreeAverages(double **A, edge *e, node *v, int direction);
318 /*the ME version of updateAveragesMatrix does not update the entire matrix
319 A, but updates A[v->index][w->index] whenever this represents an average
320 of 1-distant or 2-distant subtrees*/
322 void GMEupdateAveragesMatrix(double **A, edge *e, node *v, node *newNode)
324 edge *sib, *par, *left, *right;
325 sib = siblingEdge(e);
326 left = e->head->leftEdge;
327 right = e->head->rightEdge;
328 par = e->tail->parentEdge;
330 /*we need to update the matrix A so all 1-distant, 2-distant, and
331 3-distant averages are correct*/
333 /*first, initialize the newNode entries*/
335 A[newNode->index][newNode->index] =
336 (e->bottomsize*A[e->head->index][e->head->index]
337 + A[v->index][e->head->index])
338 / (e->bottomsize + 1);
340 A[v->index][v->index] =
341 (e->bottomsize*A[e->head->index][v->index]
342 + e->topsize*A[v->index][e->head->index])
343 / (e->bottomsize + e->topsize);
345 /*2-distant for v,newNode*/
346 A[v->index][newNode->index] = A[newNode->index][v->index] =
347 A[v->index][e->head->index];
349 /*second 2-distant for newNode*/
350 A[newNode->index][e->tail->index] = A[e->tail->index][newNode->index]
351 = (e->bottomsize*A[e->head->index][e->tail->index]
352 + A[v->index][e->tail->index])/(e->bottomsize + 1);
353 /*third 2-distant for newNode*/
354 A[newNode->index][e->head->index] = A[e->head->index][newNode->index]
355 = A[e->head->index][e->head->index];
359 /*fourth and last 2-distant for newNode*/
360 A[newNode->index][sib->head->index] =
361 A[sib->head->index][newNode->index] =
362 (e->bottomsize*A[sib->head->index][e->head->index]
363 + A[sib->head->index][v->index]) / (e->bottomsize + 1);
364 updateSubTreeAverages(A,sib,v,SKEW); /*updates sib and below*/
368 if (e->tail->leftEdge == e)
369 updateSubTreeAverages(A,par,v,LEFT); /*updates par and above*/
371 updateSubTreeAverages(A,par,v,RIGHT);
374 updateSubTreeAverages(A,left,v,UP); /*updates left and below*/
376 updateSubTreeAverages(A,right,v,UP); /*updates right and below*/
378 /*1-dist for e->head*/
379 A[e->head->index][e->head->index] =
380 (e->topsize*A[e->head->index][e->head->index]
381 + A[e->head->index][v->index]) / (e->topsize+1);
382 /*2-dist for e->head (v,newNode,left,right)
383 taken care of elsewhere*/
384 /*3-dist with e->head either taken care of elsewhere (below)
385 or unchanged (sib,e->tail)*/
387 /*symmetrize the matrix (at least for distant-2 subtrees) */
388 A[v->index][e->head->index] = A[e->head->index][v->index];
389 /*and distant-3 subtrees*/
390 A[e->tail->index][v->index] = A[v->index][e->tail->index];
392 A[v->index][left->head->index] = A[left->head->index][v->index];
394 A[v->index][right->head->index] = A[right->head->index][v->index];
396 A[v->index][sib->head->index] = A[sib->head->index][v->index];
400 void GMEsplitEdge(tree *T, node *v, edge *e, double **A)
402 char nodelabel[NODE_LABEL_LENGTH];
403 char edgelabel[EDGE_LABEL_LENGTH];
404 edge *newPendantEdge;
405 edge *newInternalEdge;
408 snprintf(nodelabel,1,"");
409 newNode = makeNewNode(nodelabel,T->size + 1);
411 //sprintf(edgelabel,"E%d",T->size);
412 snprintf(edgelabel,EDGE_LABEL_LENGTH,"E%d",T->size);
413 newPendantEdge = makeEdge(edgelabel,newNode,v,0.0);
415 //sprintf(edgelabel,"E%d",T->size+1);
416 snprintf(edgelabel,EDGE_LABEL_LENGTH,"E%d",T->size+1);
417 newInternalEdge = makeEdge(edgelabel,newNode,e->head,0.0);
420 printf("Inserting node %s on edge %s between nodes %s and %s.\n",
421 v->label,e->label,e->tail->label,e->head->label);*/
422 /*update the matrix of average distances*/
423 /*also updates the bottomsize, topsize fields*/
425 GMEupdateAveragesMatrix(A,e,v,newNode);
427 newNode->parentEdge = e;
428 e->head->parentEdge = newInternalEdge;
429 v->parentEdge = newPendantEdge;
432 T->size = T->size + 2;
434 if (e->tail->leftEdge == e)
436 newNode->leftEdge = newInternalEdge;
437 newNode->rightEdge = newPendantEdge;
441 newNode->leftEdge = newInternalEdge;
442 newNode->rightEdge = newPendantEdge;
445 /*assign proper topsize, bottomsize values to the two new Edges*/
447 newPendantEdge->bottomsize = 1;
448 newPendantEdge->topsize = e->bottomsize + e->topsize;
450 newInternalEdge->bottomsize = e->bottomsize;
451 newInternalEdge->topsize = e->topsize; /*off by one, but we adjust
454 /*and increment these fields for all other edges*/
455 updateSizes(newInternalEdge,UP);
459 void updateSubTreeAverages(double **A, edge *e, node *v, int direction)
460 /*the monster function of this program*/
462 edge *sib, *left, *right, *par;
463 left = e->head->leftEdge;
464 right = e->head->rightEdge;
465 sib = siblingEdge(e);
466 par = e->tail->parentEdge;
469 /*want to preserve correctness of
470 all 1-distant, 2-distant, and 3-distant averages*/
471 /*1-distant updated at edge splitting the two trees*/
473 (left->head,right->head) and (head,tail) updated at
474 a given edge. Note, NOT updating (head,sib->head)!
475 (That would lead to multiple updating).*/
476 /*3-distant updated: at edge in center of quartet*/
477 case UP: /*point of insertion is above e*/
478 /*1-distant average of nodes below e to
480 A[e->head->index][e->head->index] =
481 (e->topsize*A[e->head->index][e->head->index] +
482 A[e->head->index][v->index])/(e->topsize + 1);
483 /*2-distant average of nodes below e to
484 nodes above parent of e*/
485 A[e->head->index][par->head->index] =
486 A[par->head->index][e->head->index] =
487 (par->topsize*A[par->head->index][e->head->index]
488 + A[e->head->index][v->index]) / (par->topsize + 1);
489 /*must do both 3-distant averages involving par*/
492 updateSubTreeAverages(A, left, v, UP); /*and recursive call*/
493 /*3-distant average*/
494 A[par->head->index][left->head->index]
495 = A[left->head->index][par->head->index]
496 = (par->topsize*A[par->head->index][left->head->index]
497 + A[left->head->index][v->index]) / (par->topsize + 1);
501 updateSubTreeAverages(A, right, v, UP);
502 A[par->head->index][right->head->index]
503 = A[right->head->index][par->head->index]
504 = (par->topsize*A[par->head->index][right->head->index]
505 + A[right->head->index][v->index]) / (par->topsize + 1);
508 case SKEW: /*point of insertion is skew to e*/
509 /*1-distant average of nodes below e to
511 A[e->head->index][e->head->index] =
512 (e->topsize*A[e->head->index][e->head->index] +
513 A[e->head->index][v->index])/(e->topsize + 1);
514 /*no 2-distant averages to update in this case*/
515 /*updating 3-distant averages involving sib*/
518 updateSubTreeAverages(A, left, v, UP);
519 A[sib->head->index][left->head->index]
520 = A[left->head->index][sib->head->index]
521 = (sib->bottomsize*A[sib->head->index][left->head->index]
522 + A[left->head->index][v->index]) / (sib->bottomsize + 1);
526 updateSubTreeAverages(A, right, v, UP);
527 A[sib->head->index][right->head->index]
528 = A[right->head->index][sib->head->index]
529 = (sib->bottomsize*A[par->head->index][right->head->index]
530 + A[right->head->index][v->index]) / (sib->bottomsize + 1);
535 case LEFT: /*point of insertion is below the edge left*/
536 /*1-distant average*/
537 A[e->head->index][e->head->index] =
538 (e->bottomsize*A[e->head->index][e->head->index] +
539 A[v->index][e->head->index])/(e->bottomsize + 1);
540 /*2-distant averages*/
541 A[e->head->index][e->tail->index] =
542 A[e->tail->index][e->head->index] =
543 (e->bottomsize*A[e->head->index][e->tail->index] +
544 A[v->index][e->tail->index])/(e->bottomsize + 1);
545 A[left->head->index][right->head->index] =
546 A[right->head->index][left->head->index] =
547 (left->bottomsize*A[right->head->index][left->head->index]
548 + A[right->head->index][v->index]) / (left->bottomsize+1);
549 /*3-distant avereages involving left*/
552 updateSubTreeAverages(A, sib, v, SKEW);
553 A[left->head->index][sib->head->index]
554 = A[sib->head->index][left->head->index]
555 = (left->bottomsize*A[left->head->index][sib->head->index]
556 + A[sib->head->index][v->index]) / (left->bottomsize + 1);
560 if (e->tail->leftEdge == e)
561 updateSubTreeAverages(A, par, v, LEFT);
563 updateSubTreeAverages(A, par, v, RIGHT);
564 A[left->head->index][par->head->index]
565 = A[par->head->index][left->head->index]
566 = (left->bottomsize*A[left->head->index][par->head->index]
567 + A[v->index][par->head->index]) / (left->bottomsize + 1);
570 case RIGHT: /*point of insertion is below the edge right*/
571 /*1-distant average*/
572 A[e->head->index][e->head->index] =
573 (e->bottomsize*A[e->head->index][e->head->index] +
574 A[v->index][e->head->index])/(e->bottomsize + 1);
575 /*2-distant averages*/
576 A[e->head->index][e->tail->index] =
577 A[e->tail->index][e->head->index] =
578 (e->bottomsize*A[e->head->index][e->tail->index] +
579 A[v->index][e->tail->index])/(e->bottomsize + 1);
580 A[left->head->index][right->head->index] =
581 A[right->head->index][left->head->index] =
582 (right->bottomsize*A[right->head->index][left->head->index]
583 + A[left->head->index][v->index]) / (right->bottomsize+1);
584 /*3-distant avereages involving right*/
587 updateSubTreeAverages(A, sib, v, SKEW);
588 A[right->head->index][sib->head->index]
589 = A[sib->head->index][right->head->index]
590 = (right->bottomsize*A[right->head->index][sib->head->index]
591 + A[sib->head->index][v->index]) / (right->bottomsize + 1);
595 if (e->tail->leftEdge == e)
596 updateSubTreeAverages(A, par, v, LEFT);
598 updateSubTreeAverages(A, par, v, RIGHT);
599 A[right->head->index][par->head->index]
600 = A[par->head->index][right->head->index]
601 = (right->bottomsize*A[right->head->index][par->head->index]
602 + A[v->index][par->head->index]) / (right->bottomsize + 1);
609 void assignBottomsize(edge *e)
615 assignBottomsize(e->head->leftEdge);
616 assignBottomsize(e->head->rightEdge);
617 e->bottomsize = e->head->leftEdge->bottomsize
618 + e->head->rightEdge->bottomsize;
622 void assignTopsize(edge *e, int numLeaves)
626 e->topsize = numLeaves - e->bottomsize;
627 assignTopsize(e->head->leftEdge,numLeaves);
628 assignTopsize(e->head->rightEdge,numLeaves);
632 void assignAllSizeFields(tree *T)
634 assignBottomsize(T->root->leftEdge);
635 assignTopsize(T->root->leftEdge,T->size/2 + 1);