5 * Created by Sarah Westcott on 1/22/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
13 /*****************************************************************/
16 globaldata = GlobalData::getInstance();
18 if (globaldata->runParse == true) { parseTreeFile(); globaldata->runParse = false; }
19 //for(int i = 0; i < globaldata->Treenames.size(); i++) { cout << i << '\t' << globaldata->Treenames[i] << endl; }
20 numLeaves = globaldata->Treenames.size();
21 numNodes = 2*numLeaves - 1;
23 tree.resize(numNodes);
25 //initialize tree with correct number of nodes, name and group info.
26 for (int i = 0; i < numNodes; i++) {
27 //initialize leaf nodes
28 if (i <= (numLeaves-1)) {
29 tree[i].setName(globaldata->Treenames[i]);
30 tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->Treenames[i]));
31 //set pcount and pGroup for groupname to 1.
32 tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
33 tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
34 //Treemap knows name, group and index to speed up search
35 globaldata->gTreemap->setIndex(globaldata->Treenames[i], i);
37 //intialize non leaf nodes
38 }else if (i > (numLeaves-1)) {
45 errorOut(e, "Tree", "Tree");
50 /*****************************************************************/
52 /*****************************************************************/
53 int Tree::getIndex(string searchName) {
55 //Treemap knows name, group and index to speed up search
56 // getIndex function will return the vector index or -1 if seq is not found.
57 int index = globaldata->gTreemap->getIndex(searchName);
62 errorOut(e, "Tree", "getIndex");
66 /*****************************************************************/
68 void Tree::setIndex(string searchName, int index) {
70 //set index in treemap
71 globaldata->gTreemap->setIndex(searchName, index);
74 errorOut(e, "Tree", "setIndex");
78 /*****************************************************************/
79 void Tree::assembleTree() {
81 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
82 for (int i = numLeaves; i < numNodes; i++) {
83 tree[i].pGroups = (mergeGroups(i));
84 tree[i].pcount = (mergeGcounts(i));
88 errorOut(e, "Tree", "assembleTree");
92 /*****************************************************************/
93 void Tree::getCopy(Tree* copy) {
96 //for each node in the tree copy its info
97 for (int i = 0; i < numNodes; i++) {
99 tree[i].setName(copy->tree[i].getName());
102 tree[i].setGroup(copy->tree[i].getGroup());
105 tree[i].setBranchLength(copy->tree[i].getBranchLength());
108 tree[i].setParent(copy->tree[i].getParent());
111 tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
113 //copy index in node and tmap
114 tree[i].setIndex(copy->tree[i].getIndex());
115 setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
118 tree[i].pGroups = copy->tree[i].pGroups;
121 tree[i].pcount = copy->tree[i].pcount;
124 catch(exception& e) {
125 errorOut(e, "Tree", "getCopy");
129 /*****************************************************************/
130 //returns a map with a groupname and the number of times that group was seen in the children
131 //for instance if your children are white and black then it would return a map with 2 entries
132 // p[white] = 1 and p[black] = 1. Now go up a level and merge that with a node who has p[white] = 1
133 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
135 map<string, int> Tree::mergeGroups(int i) {
137 int lc = tree[i].getLChild();
138 int rc = tree[i].getRChild();
140 //set parsimony groups to left child
141 map<string,int> parsimony = tree[lc].pGroups;
145 //look at right child groups and update maxPars if right child has something higher for that group.
146 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
147 it2 = parsimony.find(it->first);
148 if (it2 != parsimony.end()) {
149 parsimony[it->first]++;
151 parsimony[it->first] = 1;
154 if(parsimony[it->first] > maxPars){
155 maxPars = parsimony[it->first];
159 // this is true if right child had a greater parsimony for a certain group
161 //erase all the groups that are only 1 because you found something with 2.
162 for(it=parsimony.begin();it!=parsimony.end();){
164 parsimony.erase(it++);
167 //set one remaining groups to 1
168 //so with our above example p[white] = 2 would be left and it would become p[white] = 1
169 for(it=parsimony.begin();it!=parsimony.end();it++){
170 parsimony[it->first] = 1;
177 catch(exception& e) {
178 errorOut(e, "Tree", "mergeGroups");
182 /*****************************************************************/
183 //returns a map with a groupname and the number of times that group was seen in the children
184 //for instance if your children are white and black then it would return a map with 2 entries
185 // p[white] = 1 and p[black] = 1. Now go up a level and merge that with a node who has p[white] = 1
186 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
188 map<string, int> Tree::mergeUserGroups(int i, vector<string> g) {
191 int lc = tree[i].getLChild();
192 int rc = tree[i].getRChild();
194 //loop through nodes groups removing the ones the user doesn't want
195 for(it=tree[lc].pGroups.begin();it!=tree[lc].pGroups.end();){
196 if (inUsersGroups(it->first, g) != true) {
197 tree[lc].pGroups.erase(it++);
201 //loop through nodes groups removing the ones the user doesn't want
202 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();){
203 if (inUsersGroups(it->first, g) != true) {
204 tree[rc].pGroups.erase(it++);
208 //set parsimony groups to left child
209 map<string,int> parsimony = tree[lc].pGroups;
213 //look at right child groups and update maxPars if right child has something higher for that group.
214 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
215 it2 = parsimony.find(it->first);
216 if (it2 != parsimony.end()) {
217 parsimony[it->first]++;
219 parsimony[it->first] = 1;
222 if(parsimony[it->first] > maxPars){
223 maxPars = parsimony[it->first];
227 // this is true if right child had a greater parsimony for a certain group
229 //erase all the groups that are only 1 because you found something with 2.
230 for(it=parsimony.begin();it!=parsimony.end();){
232 parsimony.erase(it++);
236 for(it=parsimony.begin();it!=parsimony.end();it++){
237 parsimony[it->first] = 1;
243 catch(exception& e) {
244 errorOut(e, "Tree", "mergeUserGroups");
250 /**************************************************************************************************/
252 map<string,int> Tree::mergeGcounts(int position) {
254 map<string,int>::iterator pos;
256 int lc = tree[position].getLChild();
257 int rc = tree[position].getRChild();
259 map<string,int> sum = tree[lc].pcount;
261 for(it=tree[rc].pcount.begin();it!=tree[rc].pcount.end();it++){
262 sum[it->first] += it->second;
266 catch(exception& e) {
267 errorOut(e, "Tree", "mergeGcounts");
271 /**************************************************************************************************/
273 void Tree::randomLabels(vector<string> g) {
276 for(int i = 0; i < numLeaves; i++){
278 //get random index to switch with
279 z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
281 //you only want to randomize the nodes that are from a group the user wants analyzed, so
282 //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them.
285 treez = inUsersGroups(tree[z].getGroup(), g);
286 treei = inUsersGroups(tree[i].getGroup(), g);
288 if ((treez == true) && (treei == true)) {
289 //switches node i and node z's info.
290 map<string,int> lib_hold = tree[z].pGroups;
291 tree[z].pGroups = (tree[i].pGroups);
292 tree[i].pGroups = (lib_hold);
294 string zgroup = tree[z].getGroup();
295 tree[z].setGroup(tree[i].getGroup());
296 tree[i].setGroup(zgroup);
298 string zname = tree[z].getName();
299 tree[z].setName(tree[i].getName());
300 tree[i].setName(zname);
302 map<string,int> gcount_hold = tree[z].pcount;
303 tree[z].pcount = (tree[i].pcount);
304 tree[i].pcount = (gcount_hold);
308 catch(exception& e) {
309 errorOut(e, "Tree", "randomLabels");
313 /**************************************************************************************************/
315 void Tree::randomLabels(string groupA, string groupB) {
317 int numSeqsA = globaldata->gTreemap->seqsPerGroup[groupA];
318 int numSeqsB = globaldata->gTreemap->seqsPerGroup[groupB];
320 vector<string> randomGroups(numSeqsA+numSeqsB, groupA);
321 for(int i=numSeqsA;i<randomGroups.size();i++){
322 randomGroups[i] = groupB;
324 random_shuffle(randomGroups.begin(), randomGroups.end());
326 int randomCounter = 0;
327 for(int i=0;i<numLeaves;i++){
328 if(tree[i].getGroup() == groupA || tree[i].getGroup() == groupB){
329 tree[i].setGroup(randomGroups[randomCounter]);
330 tree[i].pcount.clear();
331 tree[i].pcount[randomGroups[randomCounter]] = 1;
332 tree[i].pGroups.clear();
333 tree[i].pGroups[randomGroups[randomCounter]] = 1;
338 catch(exception& e) {
339 errorOut(e, "Tree", "randomLabels");
343 /**************************************************************************************************/
344 void Tree::randomBlengths() {
346 for(int i=numNodes-1;i>=0;i--){
347 int z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
349 float bl_hold = tree[z].getBranchLength();
350 tree[z].setBranchLength(tree[i].getBranchLength());
351 tree[i].setBranchLength(bl_hold);
354 catch(exception& e) {
355 errorOut(e, "Tree", "randomBlengths");
359 /*************************************************************************************************/
360 void Tree::assembleRandomUnifracTree(vector<string> g) {
364 /*************************************************************************************************/
365 void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
366 randomLabels(groupA, groupB);
370 /*************************************************************************************************/
371 //for now it's just random topology but may become random labels as well later that why this is such a simple function now...
372 void Tree::assembleRandomTree() {
376 /**************************************************************************************************/
378 void Tree::randomTopology() {
380 for(int i=0;i<numNodes;i++){
381 tree[i].setParent(-1);
383 for(int i=numLeaves;i<numNodes;i++){
384 tree[i].setChildren(-1, -1);
387 for(int i=numLeaves;i<numNodes;i++){
389 int rnd_index1, rnd_index2;
391 rnd_index1 = (int)(((double)rand() / (double) RAND_MAX)*i);
392 if(tree[rnd_index1].getParent() == -1){escape = 1;}
397 rnd_index2 = (int)(((double)rand() / (double) RAND_MAX)*i);
398 if(rnd_index2 != rnd_index1 && tree[rnd_index2].getParent() == -1){
403 tree[i].setChildren(rnd_index1,rnd_index2);
404 tree[i].setParent(-1);
405 tree[rnd_index1].setParent(i);
406 tree[rnd_index2].setParent(i);
409 catch(exception& e) {
410 errorOut(e, "Tree", "randomTopology");
414 /*****************************************************************/
415 void Tree::print(ostream& out) {
417 int root = findRoot();
418 printBranch(root, out, "branch");
421 catch(exception& e) {
422 errorOut(e, "Tree", "print");
426 /*****************************************************************/
427 void Tree::printForBoot(ostream& out) {
429 int root = findRoot();
430 printBranch(root, out, "boot");
433 catch(exception& e) {
434 errorOut(e, "Tree", "printForBoot");
439 /*****************************************************************/
440 // This prints out the tree in Newick form.
441 void Tree::createNewickFile(string f) {
443 int root = findRoot();
444 //filename = getRootName(globaldata->getTreeFile()) + "newick";
447 openOutputFile(filename, out);
449 printBranch(root, out, "branch");
451 // you are at the end of the tree
455 catch(exception& e) {
456 errorOut(e, "Tree", "createNewickFile");
461 /*****************************************************************/
462 //This function finds the index of the root node.
464 int Tree::findRoot() {
466 for (int i = 0; i < numNodes; i++) {
468 if (tree[i].getParent() == -1) { return i; }
469 //cout << "i = " << i << endl;
470 //cout << "i's parent = " << tree[i].getParent() << endl;
474 catch(exception& e) {
475 errorOut(e, "Tree", "findRoot");
480 /*****************************************************************/
481 void Tree::printBranch(int node, ostream& out, string mode) {
484 // you are not a leaf
485 if (tree[node].getLChild() != -1) {
487 printBranch(tree[node].getLChild(), out, mode);
489 printBranch(tree[node].getRChild(), out, mode);
491 if (mode == "branch") {
492 //if there is a branch length then print it
493 if (tree[node].getBranchLength() != -1) {
494 out << ":" << tree[node].getBranchLength();
496 }else if (mode == "boot") {
497 //if there is a label then print it
498 if (tree[node].getLabel() != -1) {
499 out << tree[node].getLabel();
502 }else { //you are a leaf
503 out << tree[node].getGroup();
504 if (mode == "branch") {
505 //if there is a branch length then print it
506 if (tree[node].getBranchLength() != -1) {
507 out << ":" << tree[node].getBranchLength();
509 }else if (mode == "boot") {
510 //if there is a label then print it
511 if (tree[node].getLabel() != -1) {
512 out << tree[node].getLabel();
518 catch(exception& e) {
519 errorOut(e, "Tree", "printBranch");
524 /*****************************************************************/
526 void Tree::printTree() {
528 for(int i=0;i<numNodes;i++){
535 /*****************************************************************/
536 //this code is a mess and should be rethought...-slw
537 void Tree::parseTreeFile() {
539 //only takes names from the first tree and assumes that all trees use the same names.
541 string filename = globaldata->getTreeFile();
543 openInputFile(filename, filehandle);
548 //ifyou are not a nexus file
549 if((c = filehandle.peek()) != '#') {
550 while((c = filehandle.peek()) != ';') {
551 while ((c = filehandle.peek()) != ';') {
559 if((c == '(') && (comment != 1)){ break; }
563 done = readTreeString(filehandle);
564 if (done == 0) { break; }
566 //ifyou are a nexus file
567 }else if((c = filehandle.peek()) == '#') {
571 while(holder != "translate" && holder != "Translate"){
572 if(holder == "[" || holder == "[!"){
578 filehandle >> holder;
580 //if there is no translate then you must read tree string otherwise use translate to get names
581 if((holder == "tree") && (comment != 1)){
582 //pass over the "tree rep.6878900 = "
583 while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF)) {;}
585 if(c == EOF) { break; }
586 filehandle.putback(c); //put back first ( of tree.
587 done = readTreeString(filehandle);
592 if (done == 0) { break; }
595 //use nexus translation rather than parsing tree to save time
596 if((holder == "translate") || (holder == "Translate")) {
598 string number, name, h;
599 h = ""; // so it enters the loop the first time
600 while((h != ";") && (number != ";")) {
601 filehandle >> number;
604 //c = , until done with translation then c = ;
605 h = name.substr(name.length()-1, name.length());
606 name.erase(name.end()-1); //erase the comma
607 globaldata->Treenames.push_back(number);
609 if(number == ";") { globaldata->Treenames.pop_back(); } //in case ';' from translation is on next line instead of next to last name
614 catch(exception& e) {
615 errorOut(e, "Tree", "parseTreeFile");
619 /*******************************************************/
621 /*******************************************************/
622 int Tree::readTreeString(ifstream& filehandle) {
627 while((c = filehandle.peek()) != ';') {
629 //cout << " at beginning of while " << k << endl;
631 //to pass over labels in trees
633 while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ c=filehandle.get(); }
634 filehandle.putback(c);
636 if(c == ';') { return 0; }
637 if(c == -1) { return 0; }
639 if((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != '\t') && (c != 32)) { //32 is space
641 c = filehandle.get();
644 while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) {
646 c = filehandle.get();
648 //cout << " in name while " << k << endl;
651 //cout << "name = " << name << endl;
652 globaldata->Treenames.push_back(name);
653 filehandle.putback(c);
655 //cout << " after putback" << k << endl;
658 if(c == ':') { //read until you reach the end of the branch length
659 while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) {
660 c = filehandle.get();
662 //cout << " in branch while " << k << endl;
664 filehandle.putback(c);
667 c = filehandle.get();
669 //cout << " here after get " << k << endl;
670 if(c == ';') { return 0; }
671 if(c == ')') { filehandle.putback(c); }
678 catch(exception& e) {
679 errorOut(e, "Tree", "readTreeString");
684 /*******************************************************/
686 /*******************************************************/