5 * Created by Sarah Westcott on 1/22/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
12 /*****************************************************************/
13 Tree::Tree(string g) {
15 globaldata = GlobalData::getInstance();
16 m = MothurOut::getInstance();
18 parseTreeFile(); globaldata->runParse = false;
21 m->errorOut(e, "Tree", "Tree - just parse");
25 /*****************************************************************/
28 globaldata = GlobalData::getInstance();
29 m = MothurOut::getInstance();
31 if (globaldata->runParse == true) { parseTreeFile(); globaldata->runParse = false; }
32 //for(int i = 0; i < globaldata->Treenames.size(); i++) { cout << i << '\t' << globaldata->Treenames[i] << endl; }
33 numLeaves = globaldata->Treenames.size();
34 numNodes = 2*numLeaves - 1;
36 tree.resize(numNodes);
38 //initialize groupNodeInfo
39 for (int i = 0; i < globaldata->gTreemap->namesOfGroups.size(); i++) {
40 groupNodeInfo[globaldata->gTreemap->namesOfGroups[i]].resize(0);
43 //initialize tree with correct number of nodes, name and group info.
44 for (int i = 0; i < numNodes; i++) {
45 //initialize leaf nodes
46 if (i <= (numLeaves-1)) {
47 tree[i].setName(globaldata->Treenames[i]);
50 string group = globaldata->gTreemap->getGroup(globaldata->Treenames[i]);
51 vector<string> tempGroups; tempGroups.push_back(group);
52 tree[i].setGroup(tempGroups);
53 groupNodeInfo[group].push_back(i);
55 //set pcount and pGroup for groupname to 1.
56 tree[i].pcount[group] = 1;
57 tree[i].pGroups[group] = 1;
59 //Treemap knows name, group and index to speed up search
60 globaldata->gTreemap->setIndex(globaldata->Treenames[i], i);
62 //intialize non leaf nodes
63 }else if (i > (numLeaves-1)) {
65 vector<string> tempGroups;
66 tree[i].setGroup(tempGroups);
71 m->errorOut(e, "Tree", "Tree");
76 /*****************************************************************/
78 /*****************************************************************/
79 void Tree::addNamesToCounts() {
81 //ex. seq1 seq2,seq3,se4
87 //before this function seq1.pcount = pasture -> 1
88 //after seq1.pcount = pasture -> 2, forest -> 1, ocean -> 1
90 //before this function seq1.pgroups = pasture -> 1
91 //after seq1.pgroups = pasture -> 1 since that is the dominant group
94 //go through each leaf and update its pcounts and pgroups
98 for (int i = 0; i < numLeaves; i++) {
100 string name = tree[i].getName();
102 map<string, string>::iterator itNames = globaldata->names.find(name);
104 if (itNames == globaldata->names.end()) { m->mothurOut(name + " is not in your name file, please correct."); m->mothurOutEndLine(); exit(1); }
106 vector<string> dupNames;
107 m->splitAtComma(globaldata->names[name], dupNames);
109 map<string, int>::iterator itCounts;
111 set<string> groupsAddedForThisNode;
112 for (int j = 0; j < dupNames.size(); j++) {
114 string group = globaldata->gTreemap->getGroup(dupNames[j]);
116 if (dupNames[j] != name) {//you already added yourself in the constructor
118 if (groupsAddedForThisNode.count(group) == 0) { groupNodeInfo[group].push_back(i); groupsAddedForThisNode.insert(group); } //if you have not already added this node for this group, then add it
121 itCounts = tree[i].pcount.find(group);
122 if (itCounts == tree[i].pcount.end()) { //new group, add it
123 tree[i].pcount[group] = 1;
125 tree[i].pcount[group]++;
129 itCounts = tree[i].pGroups.find(group);
130 if (itCounts == tree[i].pGroups.end()) { //new group, add it
131 tree[i].pGroups[group] = 1;
133 tree[i].pGroups[group]++;
137 if(tree[i].pGroups[group] > maxPars){
138 maxPars = tree[i].pGroups[group];
140 }else { groupsAddedForThisNode.insert(group); } //add it so you don't add it to groupNodeInfo again
143 if (maxPars > 1) { //then we have some more dominant groups
144 //erase all the groups that are less than maxPars because you found a more dominant group.
145 for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){
146 if(it->second < maxPars){
147 tree[i].pGroups.erase(it++);
150 //set one remaining groups to 1
151 for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){
152 tree[i].pGroups[it->first] = 1;
156 //update groups to reflect all the groups this node represents
157 vector<string> nodeGroups;
158 map<string, int>::iterator itGroups;
159 for (itGroups = tree[i].pcount.begin(); itGroups != tree[i].pcount.end(); itGroups++) {
160 nodeGroups.push_back(itGroups->first);
162 tree[i].setGroup(nodeGroups);
168 //cout << "addNamesToCounts\t" << (B - A) / CLOCKS_PER_SEC << endl;
171 catch(exception& e) {
172 m->errorOut(e, "Tree", "addNamesToCounts");
176 /*****************************************************************/
177 int Tree::getIndex(string searchName) {
179 //Treemap knows name, group and index to speed up search
180 // getIndex function will return the vector index or -1 if seq is not found.
181 int index = globaldata->gTreemap->getIndex(searchName);
185 catch(exception& e) {
186 m->errorOut(e, "Tree", "getIndex");
190 /*****************************************************************/
192 void Tree::setIndex(string searchName, int index) {
194 //set index in treemap
195 globaldata->gTreemap->setIndex(searchName, index);
197 catch(exception& e) {
198 m->errorOut(e, "Tree", "setIndex");
202 /*****************************************************************/
203 int Tree::assembleTree() {
207 //if user has given a names file we want to include that info in the pgroups and pcount info.
208 if(globaldata->names.size() != 0) { addNamesToCounts(); }
210 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
211 for (int i = numLeaves; i < numNodes; i++) {
212 if (m->control_pressed) { return 1; }
214 tree[i].pGroups = (mergeGroups(i));
215 tree[i].pcount = (mergeGcounts(i));
218 //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl;
221 catch(exception& e) {
222 m->errorOut(e, "Tree", "assembleTree");
226 /*****************************************************************/
227 int Tree::assembleTree(string n) {
230 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
231 for (int i = numLeaves; i < numNodes; i++) {
232 if (m->control_pressed) { return 1; }
234 tree[i].pGroups = (mergeGroups(i));
235 tree[i].pcount = (mergeGcounts(i));
238 //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl;
241 catch(exception& e) {
242 m->errorOut(e, "Tree", "assembleTree");
247 /*****************************************************************/
248 void Tree::getCopy(Tree* copy) {
251 //for each node in the tree copy its info
252 for (int i = 0; i < numNodes; i++) {
254 tree[i].setName(copy->tree[i].getName());
257 tree[i].setGroup(copy->tree[i].getGroup());
260 tree[i].setBranchLength(copy->tree[i].getBranchLength());
263 tree[i].setParent(copy->tree[i].getParent());
266 tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
268 //copy index in node and tmap
269 tree[i].setIndex(copy->tree[i].getIndex());
270 setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
273 tree[i].pGroups = copy->tree[i].pGroups;
276 tree[i].pcount = copy->tree[i].pcount;
279 groupNodeInfo = copy->groupNodeInfo;
282 catch(exception& e) {
283 m->errorOut(e, "Tree", "getCopy");
287 /*****************************************************************/
288 //returns a map with a groupname and the number of times that group was seen in the children
289 //for instance if your children are white and black then it would return a map with 2 entries
290 // p[white] = 1 and p[black] = 1. Now go up a level and merge that with a node who has p[white] = 1
291 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
293 map<string, int> Tree::mergeGroups(int i) {
295 int lc = tree[i].getLChild();
296 int rc = tree[i].getRChild();
298 //set parsimony groups to left child
299 map<string,int> parsimony = tree[lc].pGroups;
303 //look at right child groups and update maxPars if right child has something higher for that group.
304 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
305 it2 = parsimony.find(it->first);
306 if (it2 != parsimony.end()) {
307 parsimony[it->first]++;
309 parsimony[it->first] = 1;
312 if(parsimony[it->first] > maxPars){
313 maxPars = parsimony[it->first];
317 // this is true if right child had a greater parsimony for a certain group
319 //erase all the groups that are only 1 because you found something with 2.
320 for(it=parsimony.begin();it!=parsimony.end();){
322 parsimony.erase(it++);
325 //set one remaining groups to 1
326 //so with our above example p[white] = 2 would be left and it would become p[white] = 1
327 for(it=parsimony.begin();it!=parsimony.end();it++){
328 parsimony[it->first] = 1;
335 catch(exception& e) {
336 m->errorOut(e, "Tree", "mergeGroups");
340 /*****************************************************************/
341 //returns a map with a groupname and the number of times that group was seen in the children
342 //for instance if your children are white and black then it would return a map with 2 entries
343 // p[white] = 1 and p[black] = 1. Now go up a level and merge that with a node who has p[white] = 1
344 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
346 map<string, int> Tree::mergeUserGroups(int i, vector<string> g) {
349 int lc = tree[i].getLChild();
350 int rc = tree[i].getRChild();
352 //loop through nodes groups removing the ones the user doesn't want
353 for(it=tree[lc].pGroups.begin();it!=tree[lc].pGroups.end();){
354 if (m->inUsersGroups(it->first, g) != true) {
355 tree[lc].pGroups.erase(it++);
359 //loop through nodes groups removing the ones the user doesn't want
360 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();){
361 if (m->inUsersGroups(it->first, g) != true) {
362 tree[rc].pGroups.erase(it++);
366 //set parsimony groups to left child
367 map<string,int> parsimony = tree[lc].pGroups;
371 //look at right child groups and update maxPars if right child has something higher for that group.
372 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
373 it2 = parsimony.find(it->first);
374 if (it2 != parsimony.end()) {
375 parsimony[it->first]++;
377 parsimony[it->first] = 1;
380 if(parsimony[it->first] > maxPars){
381 maxPars = parsimony[it->first];
385 // this is true if right child had a greater parsimony for a certain group
387 //erase all the groups that are only 1 because you found something with 2.
388 for(it=parsimony.begin();it!=parsimony.end();){
390 parsimony.erase(it++);
394 for(it=parsimony.begin();it!=parsimony.end();it++){
395 parsimony[it->first] = 1;
401 catch(exception& e) {
402 m->errorOut(e, "Tree", "mergeUserGroups");
408 /**************************************************************************************************/
410 map<string,int> Tree::mergeGcounts(int position) {
412 map<string,int>::iterator pos;
414 int lc = tree[position].getLChild();
415 int rc = tree[position].getRChild();
417 map<string,int> sum = tree[lc].pcount;
419 for(it=tree[rc].pcount.begin();it!=tree[rc].pcount.end();it++){
420 sum[it->first] += it->second;
424 catch(exception& e) {
425 m->errorOut(e, "Tree", "mergeGcounts");
429 /**************************************************************************************************/
431 void Tree::randomLabels(vector<string> g) {
434 //initialize groupNodeInfo
435 for (int i = 0; i < globaldata->gTreemap->namesOfGroups.size(); i++) {
436 groupNodeInfo[globaldata->gTreemap->namesOfGroups[i]].resize(0);
439 for(int i = 0; i < numLeaves; i++){
441 //get random index to switch with
442 z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
444 //you only want to randomize the nodes that are from a group the user wants analyzed, so
445 //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them.
448 treez = m->inUsersGroups(tree[z].getGroup(), g);
449 treei = m->inUsersGroups(tree[i].getGroup(), g);
451 if ((treez == true) && (treei == true)) {
452 //switches node i and node z's info.
453 map<string,int> lib_hold = tree[z].pGroups;
454 tree[z].pGroups = (tree[i].pGroups);
455 tree[i].pGroups = (lib_hold);
457 vector<string> zgroup = tree[z].getGroup();
458 tree[z].setGroup(tree[i].getGroup());
459 tree[i].setGroup(zgroup);
461 string zname = tree[z].getName();
462 tree[z].setName(tree[i].getName());
463 tree[i].setName(zname);
465 map<string,int> gcount_hold = tree[z].pcount;
466 tree[z].pcount = (tree[i].pcount);
467 tree[i].pcount = (gcount_hold);
470 for (int k = 0; k < (tree[i].getGroup()).size(); k++) { groupNodeInfo[(tree[i].getGroup())[k]].push_back(i); }
471 for (int k = 0; k < (tree[z].getGroup()).size(); k++) { groupNodeInfo[(tree[z].getGroup())[k]].push_back(z); }
474 catch(exception& e) {
475 m->errorOut(e, "Tree", "randomLabels");
479 /**************************************************************************************************
481 void Tree::randomLabels(string groupA, string groupB) {
483 int numSeqsA = globaldata->gTreemap->seqsPerGroup[groupA];
484 int numSeqsB = globaldata->gTreemap->seqsPerGroup[groupB];
486 vector<string> randomGroups(numSeqsA+numSeqsB, groupA);
487 for(int i=numSeqsA;i<randomGroups.size();i++){
488 randomGroups[i] = groupB;
490 random_shuffle(randomGroups.begin(), randomGroups.end());
492 int randomCounter = 0;
493 for(int i=0;i<numLeaves;i++){
494 if(tree[i].getGroup() == groupA || tree[i].getGroup() == groupB){
495 tree[i].setGroup(randomGroups[randomCounter]);
496 tree[i].pcount.clear();
497 tree[i].pcount[randomGroups[randomCounter]] = 1;
498 tree[i].pGroups.clear();
499 tree[i].pGroups[randomGroups[randomCounter]] = 1;
504 catch(exception& e) {
505 m->errorOut(e, "Tree", "randomLabels");
509 /**************************************************************************************************/
510 void Tree::randomBlengths() {
512 for(int i=numNodes-1;i>=0;i--){
513 int z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
515 float bl_hold = tree[z].getBranchLength();
516 tree[z].setBranchLength(tree[i].getBranchLength());
517 tree[i].setBranchLength(bl_hold);
520 catch(exception& e) {
521 m->errorOut(e, "Tree", "randomBlengths");
525 /*************************************************************************************************/
526 void Tree::assembleRandomUnifracTree(vector<string> g) {
528 assembleTree("noNameCounts");
530 /*************************************************************************************************/
531 void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
533 vector<string> temp; temp.push_back(groupA); temp.push_back(groupB);
535 assembleTree("noNameCounts");
538 /*************************************************************************************************/
539 //for now it's just random topology but may become random labels as well later that why this is such a simple function now...
540 void Tree::assembleRandomTree() {
544 /**************************************************************************************************/
546 void Tree::randomTopology() {
548 for(int i=0;i<numNodes;i++){
549 tree[i].setParent(-1);
551 for(int i=numLeaves;i<numNodes;i++){
552 tree[i].setChildren(-1, -1);
555 for(int i=numLeaves;i<numNodes;i++){
557 int rnd_index1, rnd_index2;
559 rnd_index1 = (int)(((double)rand() / (double) RAND_MAX)*i);
560 if(tree[rnd_index1].getParent() == -1){escape = 1;}
565 rnd_index2 = (int)(((double)rand() / (double) RAND_MAX)*i);
566 if(rnd_index2 != rnd_index1 && tree[rnd_index2].getParent() == -1){
571 tree[i].setChildren(rnd_index1,rnd_index2);
572 tree[i].setParent(-1);
573 tree[rnd_index1].setParent(i);
574 tree[rnd_index2].setParent(i);
577 catch(exception& e) {
578 m->errorOut(e, "Tree", "randomTopology");
582 /*****************************************************************/
583 void Tree::print(ostream& out) {
585 int root = findRoot();
586 printBranch(root, out, "branch");
589 catch(exception& e) {
590 m->errorOut(e, "Tree", "print");
594 /*****************************************************************/
595 void Tree::printForBoot(ostream& out) {
597 int root = findRoot();
598 printBranch(root, out, "boot");
601 catch(exception& e) {
602 m->errorOut(e, "Tree", "printForBoot");
607 /*****************************************************************/
608 // This prints out the tree in Newick form.
609 void Tree::createNewickFile(string f) {
611 int root = findRoot();
612 //filename = m->getRootName(globaldata->getTreeFile()) + "newick";
615 m->openOutputFile(filename, out);
617 printBranch(root, out, "branch");
619 // you are at the end of the tree
623 catch(exception& e) {
624 m->errorOut(e, "Tree", "createNewickFile");
629 /*****************************************************************/
630 //This function finds the index of the root node.
632 int Tree::findRoot() {
634 for (int i = 0; i < numNodes; i++) {
636 if (tree[i].getParent() == -1) { return i; }
637 //cout << "i = " << i << endl;
638 //cout << "i's parent = " << tree[i].getParent() << endl;
642 catch(exception& e) {
643 m->errorOut(e, "Tree", "findRoot");
648 /*****************************************************************/
649 void Tree::printBranch(int node, ostream& out, string mode) {
652 // you are not a leaf
653 if (tree[node].getLChild() != -1) {
655 printBranch(tree[node].getLChild(), out, mode);
657 printBranch(tree[node].getRChild(), out, mode);
659 if (mode == "branch") {
660 //if there is a branch length then print it
661 if (tree[node].getBranchLength() != -1) {
662 out << ":" << tree[node].getBranchLength();
664 }else if (mode == "boot") {
665 //if there is a label then print it
666 if (tree[node].getLabel() != -1) {
667 out << tree[node].getLabel();
670 }else { //you are a leaf
671 string leafGroup = globaldata->gTreemap->getGroup(tree[node].getName());
674 if (mode == "branch") {
675 //if there is a branch length then print it
676 if (tree[node].getBranchLength() != -1) {
677 out << ":" << tree[node].getBranchLength();
679 }else if (mode == "boot") {
680 //if there is a label then print it
681 if (tree[node].getLabel() != -1) {
682 out << tree[node].getLabel();
688 catch(exception& e) {
689 m->errorOut(e, "Tree", "printBranch");
694 /*****************************************************************/
696 void Tree::printTree() {
698 for(int i=0;i<numNodes;i++){
705 /*****************************************************************/
706 //this code is a mess and should be rethought...-slw
707 void Tree::parseTreeFile() {
709 //only takes names from the first tree and assumes that all trees use the same names.
711 string filename = globaldata->getTreeFile();
713 m->openInputFile(filename, filehandle);
718 //ifyou are not a nexus file
719 if((c = filehandle.peek()) != '#') {
720 while((c = filehandle.peek()) != ';') {
721 while ((c = filehandle.peek()) != ';') {
729 if((c == '(') && (comment != 1)){ break; }
733 done = readTreeString(filehandle);
734 if (done == 0) { break; }
736 //ifyou are a nexus file
737 }else if((c = filehandle.peek()) == '#') {
741 while(holder != "translate" && holder != "Translate"){
742 if(holder == "[" || holder == "[!"){
748 filehandle >> holder;
750 //if there is no translate then you must read tree string otherwise use translate to get names
751 if((holder == "tree") && (comment != 1)){
752 //pass over the "tree rep.6878900 = "
753 while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF)) {;}
755 if(c == EOF) { break; }
756 filehandle.putback(c); //put back first ( of tree.
757 done = readTreeString(filehandle);
762 if (done == 0) { break; }
765 //use nexus translation rather than parsing tree to save time
766 if((holder == "translate") || (holder == "Translate")) {
768 string number, name, h;
769 h = ""; // so it enters the loop the first time
770 while((h != ";") && (number != ";")) {
771 filehandle >> number;
774 //c = , until done with translation then c = ;
775 h = name.substr(name.length()-1, name.length());
776 name.erase(name.end()-1); //erase the comma
777 globaldata->Treenames.push_back(number);
779 if(number == ";") { globaldata->Treenames.pop_back(); } //in case ';' from translation is on next line instead of next to last name
784 //for (int i = 0; i < globaldata->Treenames.size(); i++) {
785 //cout << globaldata->Treenames[i] << endl; }
786 //cout << globaldata->Treenames.size() << endl;
788 catch(exception& e) {
789 m->errorOut(e, "Tree", "parseTreeFile");
793 /*******************************************************/
795 /*******************************************************/
796 int Tree::readTreeString(ifstream& filehandle) {
801 while((c = filehandle.peek()) != ';') {
803 //cout << " at beginning of while " << k << endl;
805 //to pass over labels in trees
807 while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ c=filehandle.get(); }
808 filehandle.putback(c);
810 if(c == ';') { return 0; }
811 if(c == -1) { return 0; }
813 if((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != '\t') && (c != 32)) { //32 is space
815 c = filehandle.get();
818 while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) {
820 c = filehandle.get();
822 //cout << " in name while " << k << endl;
825 //cout << "name = " << name << endl;
826 globaldata->Treenames.push_back(name);
827 filehandle.putback(c);
829 //cout << " after putback" << k << endl;
832 if(c == ':') { //read until you reach the end of the branch length
833 while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) {
834 c = filehandle.get();
836 //cout << " in branch while " << k << endl;
838 filehandle.putback(c);
841 c = filehandle.get();
843 //cout << " here after get " << k << endl;
844 if(c == ';') { return 0; }
845 if(c == ')') { filehandle.putback(c); }
852 catch(exception& e) {
853 m->errorOut(e, "Tree", "readTreeString");
858 /*******************************************************/
860 /*******************************************************/