X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2Fme_ols.c;h=3c002c9262f9f07c15e89bc3148397b07c89bf69;hb=52008daf7f708f3fcdc735f22af308dd1a461670;hp=d4a8fcbc2e4b551630674be7287854b209cbe01b;hpb=c827059eeafc8cbe41c812b26979543ab287803e;p=ape.git diff --git a/src/me_ols.c b/src/me_ols.c index d4a8fcb..3c002c9 100644 --- a/src/me_ols.c +++ b/src/me_ols.c @@ -1,6 +1,3 @@ -//#include -//#include -//#include #include "me.h" /*from NNI.c*/ @@ -15,7 +12,7 @@ void OLSext(edge *e, double **A) if(leaf(e->head)) { f = siblingEdge(e); - e->distance = 0.5*(A[e->head->index][e->tail->index] + e->distance = 0.5*(A[e->head->index][e->tail->index] + A[e->head->index][f->head->index] - A[f->head->index][e->tail->index]); } @@ -29,12 +26,12 @@ void OLSext(edge *e, double **A) } } -double wf(double lambda, double D_LR, double D_LU, double D_LD, +double wf(double lambda, double D_LR, double D_LU, double D_LD, double D_RU, double D_RD, double D_DU) { double weight; weight = 0.5*(lambda*(D_LU + D_RD) + (1 -lambda)*(D_LD + D_RU) - - (D_LR + D_DU)); + - (D_LR + D_DU)); return(weight); } @@ -45,7 +42,7 @@ void OLSint(edge *e, double **A) left = e->head->leftEdge; right = e->head->rightEdge; sib = siblingEdge(e); - lambda = ((double) sib->bottomsize*left->bottomsize + + lambda = ((double) sib->bottomsize*left->bottomsize + right->bottomsize*e->tail->parentEdge->topsize) / (e->bottomsize*e->topsize); e->distance = wf(lambda,A[left->head->index][right->head->index], @@ -88,7 +85,7 @@ void makeOLSAveragesTable(tree *T, double **D, double **A) if(leaf(e->head)) while (NULL != f) { - if (exclude != f) + if (exclude != f) { if (leaf(f->head)) A[e->head->index][f->head->index] = A[f->head->index][e->head->index] = D[e->head->index2][f->head->index2]; @@ -96,19 +93,19 @@ void makeOLSAveragesTable(tree *T, double **D, double **A) { g = f->head->leftEdge; h = f->head->rightEdge; - A[e->head->index][f->head->index] = A[f->head->index][e->head->index] = (g->bottomsize*A[e->head->index][g->head->index] + h->bottomsize*A[e->head->index][h->head->index])/f->bottomsize; + A[e->head->index][f->head->index] = A[f->head->index][e->head->index] = (g->bottomsize*A[e->head->index][g->head->index] + h->bottomsize*A[e->head->index][h->head->index])/f->bottomsize; } } else /*exclude == f*/ - exclude = exclude->tail->parentEdge; + exclude = exclude->tail->parentEdge; f = depthFirstTraverse(T,f); } - else + else /*e->head is not a leaf, so we go recursively to values calculated for the nodes below e*/ while(NULL !=f ) { - if (exclude != f) + if (exclude != f) { g = e->head->leftEdge; h = e->head->rightEdge; @@ -126,9 +123,9 @@ void makeOLSAveragesTable(tree *T, double **D, double **A) root*/ f = e->tail->parentEdge; if (NULL != f) - fillTableUp(e,f,A,D,T); + fillTableUp(e,f,A,D,T); e = depthFirstTraverse(T,e); - } + } /*we are indexing this table by vertex indices, but really the underlying object is the edge set. Thus, the array is one element @@ -144,14 +141,14 @@ void GMEcalcDownAverage(node *v, edge *e, double **D, double **A) { edge *left, *right; if (leaf(e->head)) - A[e->head->index][v->index] = D[v->index2][e->head->index2]; + A[e->head->index][v->index] = D[v->index2][e->head->index2]; else { left = e->head->leftEdge; right = e->head->rightEdge; - A[e->head->index][v->index] = - ( left->bottomsize * A[left->head->index][v->index] + - right->bottomsize * A[right->head->index][v->index]) + A[e->head->index][v->index] = + ( left->bottomsize * A[left->head->index][v->index] + + right->bottomsize * A[right->head->index][v->index]) / e->bottomsize; } } @@ -165,8 +162,8 @@ void GMEcalcUpAverage(node *v, edge *e, double **D, double **A) { up = e->tail->parentEdge; down = siblingEdge(e); - A[v->index][e->head->index] = - (up->topsize * A[v->index][up->head->index] + + A[v->index][e->head->index] = + (up->topsize * A[v->index][up->head->index] + down->bottomsize * A[down->head->index][v->index]) / e->topsize; } @@ -187,8 +184,8 @@ void GMEcalcNewvAverages(tree *T, node *v, double **D, double **A) GMEcalcDownAverage(v,e,D,A); e = depthFirstTraverse(T,e); } - - e = topFirstTraverse(T,e); /*the upward averages need to be calculated + + e = topFirstTraverse(T,e); /*the upward averages need to be calculated from top to bottom */ while(NULL != e) { @@ -197,7 +194,7 @@ void GMEcalcNewvAverages(tree *T, node *v, double **D, double **A) } } -double wf4(double lambda, double lambda2, double D_AB, double D_AC, +double wf4(double lambda, double lambda2, double D_AB, double D_AC, double D_BC, double D_Av, double D_Bv, double D_Cv) { return(((1 - lambda) * (D_AC + D_Bv) + (lambda2 - 1)*(D_AB + D_Cv) @@ -206,10 +203,10 @@ double wf4(double lambda, double lambda2, double D_AB, double D_AC, /*testEdge cacluates what the OLS weight would be if v were inserted into - T along e. Compare against known values for inserting along + T along e. Compare against known values for inserting along f = e->parentEdge */ /*edges are tested by a top-first, left-first scheme. we presume - all distances are fixed to the correct weight for + all distances are fixed to the correct weight for e->parentEdge, if e is a left-oriented edge*/ void testEdge(edge *e, node *v, double **A) { @@ -223,12 +220,12 @@ void testEdge(edge *e, node *v, double **A) / ((1 + par->topsize)*(par->bottomsize))); lambda2 = (((double) (sib->bottomsize + e->bottomsize*par->topsize)) / ((1 + e->bottomsize)*(e->topsize))); - e->totalweight = par->totalweight + e->totalweight = par->totalweight + wf4(lambda,lambda2,A[e->head->index][sib->head->index], A[sib->head->index][e->tail->index], A[e->head->index][e->tail->index], A[sib->head->index][v->index],A[e->head->index][v->index], - A[v->index][e->tail->index]); + A[v->index][e->tail->index]); } void printDoubleTable(double **A, int d) @@ -244,7 +241,7 @@ void printDoubleTable(double **A, int d) void GMEsplitEdge(tree *T, node *v, edge *e, double **A); -tree *GMEaddSpecies(tree *T,node *v, double **D, double **A) +tree *GMEaddSpecies(tree *T,node *v, double **D, double **A) /*the key function of the program addSpeices inserts the node v to the tree T. It uses testEdge to see what the weight would be if v split a particular edge. Weights are assigned by @@ -257,18 +254,18 @@ tree *GMEaddSpecies(tree *T,node *v, double **D, double **A) /* if (verbose) printf("Adding %s.\n",v->label);*/ - + /*initialize variables as necessary*/ /*CASE 1: T is empty, v is the first node*/ if (NULL == T) /*create a tree with v as only vertex, no edges*/ { T_e = newTree(); - T_e->root = v; - /*note that we are rooting T arbitrarily at a leaf. + T_e->root = v; + /*note that we are rooting T arbitrarily at a leaf. T->root is not the phylogenetic root*/ v->index = 0; T_e->size = 1; - return(T_e); + return(T_e); } /*CASE 2: T is a single-vertex tree*/ if (1 == T->size) @@ -282,11 +279,11 @@ tree *GMEaddSpecies(tree *T,node *v, double **D, double **A) A[v->index][v->index] = D[v->index2][T->root->index2]; T->root->leftEdge = v->parentEdge = e; T->size = 2; - return(T); + return(T); } /*CASE 3: T has at least two nodes and an edge. Insert new node by breaking one of the edges*/ - + v->index = T->size; /*if (!(T->size % 100)) printf("T->size is %d\n",T->size);*/ @@ -294,12 +291,12 @@ tree *GMEaddSpecies(tree *T,node *v, double **D, double **A) /*calcNewvAverges will assign values to all the edge averages of T which include the node v. Will do so using pre-existing averages in T and information from A,D*/ - e_min = T->root->leftEdge; - e = e_min->head->leftEdge; + e_min = T->root->leftEdge; + e = e_min->head->leftEdge; while (NULL != e) { - testEdge(e,v,A); - /*testEdge tests weight of tree if loop variable + testEdge(e,v,A); + /*testEdge tests weight of tree if loop variable e is the edge split, places this weight in e->totalweight field */ if (e->totalweight < w_min) { @@ -329,23 +326,23 @@ void GMEupdateAveragesMatrix(double **A, edge *e, node *v, node *newNode) /*we need to update the matrix A so all 1-distant, 2-distant, and 3-distant averages are correct*/ - + /*first, initialize the newNode entries*/ /*1-distant*/ - A[newNode->index][newNode->index] = + A[newNode->index][newNode->index] = (e->bottomsize*A[e->head->index][e->head->index] + A[v->index][e->head->index]) / (e->bottomsize + 1); /*1-distant for v*/ - A[v->index][v->index] = - (e->bottomsize*A[e->head->index][v->index] + A[v->index][v->index] = + (e->bottomsize*A[e->head->index][v->index] + e->topsize*A[v->index][e->head->index]) / (e->bottomsize + e->topsize); /*2-distant for v,newNode*/ - A[v->index][newNode->index] = A[newNode->index][v->index] = + A[v->index][newNode->index] = A[newNode->index][v->index] = A[v->index][e->head->index]; - + /*second 2-distant for newNode*/ A[newNode->index][e->tail->index] = A[e->tail->index][newNode->index] = (e->bottomsize*A[e->head->index][e->tail->index] @@ -353,12 +350,12 @@ void GMEupdateAveragesMatrix(double **A, edge *e, node *v, node *newNode) /*third 2-distant for newNode*/ A[newNode->index][e->head->index] = A[e->head->index][newNode->index] = A[e->head->index][e->head->index]; - + if (NULL != sib) { /*fourth and last 2-distant for newNode*/ A[newNode->index][sib->head->index] = - A[sib->head->index][newNode->index] = + A[sib->head->index][newNode->index] = (e->bottomsize*A[sib->head->index][e->head->index] + A[sib->head->index][v->index]) / (e->bottomsize + 1); updateSubTreeAverages(A,sib,v,SKEW); /*updates sib and below*/ @@ -373,17 +370,17 @@ void GMEupdateAveragesMatrix(double **A, edge *e, node *v, node *newNode) if (NULL != left) updateSubTreeAverages(A,left,v,UP); /*updates left and below*/ if (NULL != right) - updateSubTreeAverages(A,right,v,UP); /*updates right and below*/ + updateSubTreeAverages(A,right,v,UP); /*updates right and below*/ /*1-dist for e->head*/ - A[e->head->index][e->head->index] = + A[e->head->index][e->head->index] = (e->topsize*A[e->head->index][e->head->index] + A[e->head->index][v->index]) / (e->topsize+1); /*2-dist for e->head (v,newNode,left,right) taken care of elsewhere*/ /*3-dist with e->head either taken care of elsewhere (below) or unchanged (sib,e->tail)*/ - + /*symmetrize the matrix (at least for distant-2 subtrees) */ A[v->index][e->head->index] = A[e->head->index][v->index]; /*and distant-3 subtrees*/ @@ -395,8 +392,8 @@ void GMEupdateAveragesMatrix(double **A, edge *e, node *v, node *newNode) if (NULL != sib) A[v->index][sib->head->index] = A[sib->head->index][v->index]; -} - +} + void GMEsplitEdge(tree *T, node *v, edge *e, double **A) { char nodelabel[NODE_LABEL_LENGTH]; @@ -404,34 +401,34 @@ void GMEsplitEdge(tree *T, node *v, edge *e, double **A) edge *newPendantEdge; edge *newInternalEdge; node *newNode; - + snprintf(nodelabel,1,""); - newNode = makeNewNode(nodelabel,T->size + 1); - + newNode = makeNewNode(nodelabel,T->size + 1); + //sprintf(edgelabel,"E%d",T->size); snprintf(edgelabel,EDGE_LABEL_LENGTH,"E%d",T->size); - newPendantEdge = makeEdge(edgelabel,newNode,v,0.0); - + newPendantEdge = makeEdge(edgelabel,newNode,v,0.0); + //sprintf(edgelabel,"E%d",T->size+1); snprintf(edgelabel,EDGE_LABEL_LENGTH,"E%d",T->size+1); - newInternalEdge = makeEdge(edgelabel,newNode,e->head,0.0); - + newInternalEdge = makeEdge(edgelabel,newNode,e->head,0.0); + /* if (verbose) printf("Inserting node %s on edge %s between nodes %s and %s.\n", v->label,e->label,e->tail->label,e->head->label);*/ /*update the matrix of average distances*/ /*also updates the bottomsize, topsize fields*/ - + GMEupdateAveragesMatrix(A,e,v,newNode); newNode->parentEdge = e; e->head->parentEdge = newInternalEdge; v->parentEdge = newPendantEdge; e->head = newNode; - + T->size = T->size + 2; - if (e->tail->leftEdge == e) + if (e->tail->leftEdge == e) { newNode->leftEdge = newInternalEdge; newNode->rightEdge = newPendantEdge; @@ -441,16 +438,16 @@ void GMEsplitEdge(tree *T, node *v, edge *e, double **A) newNode->leftEdge = newInternalEdge; newNode->rightEdge = newPendantEdge; } - + /*assign proper topsize, bottomsize values to the two new Edges*/ - - newPendantEdge->bottomsize = 1; + + newPendantEdge->bottomsize = 1; newPendantEdge->topsize = e->bottomsize + e->topsize; - + newInternalEdge->bottomsize = e->bottomsize; newInternalEdge->topsize = e->topsize; /*off by one, but we adjust that below*/ - + /*and increment these fields for all other edges*/ updateSizes(newInternalEdge,UP); updateSizes(e,DOWN); @@ -466,8 +463,8 @@ void updateSubTreeAverages(double **A, edge *e, node *v, int direction) par = e->tail->parentEdge; switch(direction) { - /*want to preserve correctness of - all 1-distant, 2-distant, and 3-distant averages*/ + /*want to preserve correctness of + all 1-distant, 2-distant, and 3-distant averages*/ /*1-distant updated at edge splitting the two trees*/ /*2-distant updated: (left->head,right->head) and (head,tail) updated at @@ -475,15 +472,15 @@ void updateSubTreeAverages(double **A, edge *e, node *v, int direction) (That would lead to multiple updating).*/ /*3-distant updated: at edge in center of quartet*/ case UP: /*point of insertion is above e*/ - /*1-distant average of nodes below e to + /*1-distant average of nodes below e to nodes above e*/ - A[e->head->index][e->head->index] = - (e->topsize*A[e->head->index][e->head->index] + - A[e->head->index][v->index])/(e->topsize + 1); - /*2-distant average of nodes below e to + A[e->head->index][e->head->index] = + (e->topsize*A[e->head->index][e->head->index] + + A[e->head->index][v->index])/(e->topsize + 1); + /*2-distant average of nodes below e to nodes above parent of e*/ - A[e->head->index][par->head->index] = - A[par->head->index][e->head->index] = + A[e->head->index][par->head->index] = + A[par->head->index][e->head->index] = (par->topsize*A[par->head->index][e->head->index] + A[e->head->index][v->index]) / (par->topsize + 1); /*must do both 3-distant averages involving par*/ @@ -506,11 +503,11 @@ void updateSubTreeAverages(double **A, edge *e, node *v, int direction) } break; case SKEW: /*point of insertion is skew to e*/ - /*1-distant average of nodes below e to + /*1-distant average of nodes below e to nodes above e*/ - A[e->head->index][e->head->index] = - (e->topsize*A[e->head->index][e->head->index] + - A[e->head->index][v->index])/(e->topsize + 1); + A[e->head->index][e->head->index] = + (e->topsize*A[e->head->index][e->head->index] + + A[e->head->index][v->index])/(e->topsize + 1); /*no 2-distant averages to update in this case*/ /*updating 3-distant averages involving sib*/ if (NULL != left) @@ -534,16 +531,16 @@ void updateSubTreeAverages(double **A, edge *e, node *v, int direction) case LEFT: /*point of insertion is below the edge left*/ /*1-distant average*/ - A[e->head->index][e->head->index] = - (e->bottomsize*A[e->head->index][e->head->index] + - A[v->index][e->head->index])/(e->bottomsize + 1); + A[e->head->index][e->head->index] = + (e->bottomsize*A[e->head->index][e->head->index] + + A[v->index][e->head->index])/(e->bottomsize + 1); /*2-distant averages*/ - A[e->head->index][e->tail->index] = - A[e->tail->index][e->head->index] = - (e->bottomsize*A[e->head->index][e->tail->index] + - A[v->index][e->tail->index])/(e->bottomsize + 1); - A[left->head->index][right->head->index] = - A[right->head->index][left->head->index] = + A[e->head->index][e->tail->index] = + A[e->tail->index][e->head->index] = + (e->bottomsize*A[e->head->index][e->tail->index] + + A[v->index][e->tail->index])/(e->bottomsize + 1); + A[left->head->index][right->head->index] = + A[right->head->index][left->head->index] = (left->bottomsize*A[right->head->index][left->head->index] + A[right->head->index][v->index]) / (left->bottomsize+1); /*3-distant avereages involving left*/ @@ -566,19 +563,19 @@ void updateSubTreeAverages(double **A, edge *e, node *v, int direction) = (left->bottomsize*A[left->head->index][par->head->index] + A[v->index][par->head->index]) / (left->bottomsize + 1); } - break; + break; case RIGHT: /*point of insertion is below the edge right*/ /*1-distant average*/ - A[e->head->index][e->head->index] = - (e->bottomsize*A[e->head->index][e->head->index] + - A[v->index][e->head->index])/(e->bottomsize + 1); + A[e->head->index][e->head->index] = + (e->bottomsize*A[e->head->index][e->head->index] + + A[v->index][e->head->index])/(e->bottomsize + 1); /*2-distant averages*/ - A[e->head->index][e->tail->index] = - A[e->tail->index][e->head->index] = - (e->bottomsize*A[e->head->index][e->tail->index] + - A[v->index][e->tail->index])/(e->bottomsize + 1); - A[left->head->index][right->head->index] = - A[right->head->index][left->head->index] = + A[e->head->index][e->tail->index] = + A[e->tail->index][e->head->index] = + (e->bottomsize*A[e->head->index][e->tail->index] + + A[v->index][e->tail->index])/(e->bottomsize + 1); + A[left->head->index][right->head->index] = + A[right->head->index][left->head->index] = (right->bottomsize*A[right->head->index][left->head->index] + A[left->head->index][v->index]) / (right->bottomsize+1); /*3-distant avereages involving right*/