5 * Created by Sarah Westcott on 1/22/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
12 /*****************************************************************/
13 Tree::Tree(string g) {
15 globaldata = GlobalData::getInstance();
16 m = MothurOut::getInstance();
18 parseTreeFile(); globaldata->runParse = false;
21 m->errorOut(e, "Tree", "Tree - just parse");
25 /*****************************************************************/
28 globaldata = GlobalData::getInstance();
29 m = MothurOut::getInstance();
31 if (globaldata->runParse == true) { parseTreeFile(); globaldata->runParse = false; }
32 //for(int i = 0; i < globaldata->Treenames.size(); i++) { cout << i << '\t' << globaldata->Treenames[i] << endl; }
33 numLeaves = globaldata->Treenames.size();
34 numNodes = 2*numLeaves - 1;
36 tree.resize(numNodes);
38 //initialize tree with correct number of nodes, name and group info.
39 for (int i = 0; i < numNodes; i++) {
40 //initialize leaf nodes
41 if (i <= (numLeaves-1)) {
42 tree[i].setName(globaldata->Treenames[i]);
43 vector<string> tempGroups; tempGroups.push_back(globaldata->gTreemap->getGroup(globaldata->Treenames[i]));
44 tree[i].setGroup(tempGroups);
45 //set pcount and pGroup for groupname to 1.
46 tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
47 tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
48 //Treemap knows name, group and index to speed up search
49 globaldata->gTreemap->setIndex(globaldata->Treenames[i], i);
51 //intialize non leaf nodes
52 }else if (i > (numLeaves-1)) {
54 vector<string> tempGroups;
55 tree[i].setGroup(tempGroups);
60 m->errorOut(e, "Tree", "Tree");
65 /*****************************************************************/
67 /*****************************************************************/
68 void Tree::addNamesToCounts() {
70 //ex. seq1 seq2,seq3,se4
76 //before this function seq1.pcount = pasture -> 1
77 //after seq1.pcount = pasture -> 2, forest -> 1, ocean -> 1
79 //before this function seq1.pgroups = pasture -> 1
80 //after seq1.pgroups = pasture -> 1 since that is the dominant group
83 //go through each leaf and update its pcounts and pgroups
87 for (int i = 0; i < numLeaves; i++) {
89 string name = tree[i].getName();
91 map<string, string>::iterator itNames = globaldata->names.find(name);
93 if (itNames == globaldata->names.end()) { m->mothurOut(name + " is not in your name file, please correct."); m->mothurOutEndLine(); exit(1); }
95 vector<string> dupNames;
96 m->splitAtComma(globaldata->names[name], dupNames);
98 map<string, int>::iterator itCounts;
100 for (int j = 0; j < dupNames.size(); j++) {
102 if (dupNames[j] != name) {//you already added yourself in the constructor
103 string group = globaldata->gTreemap->getGroup(dupNames[j]);
106 itCounts = tree[i].pcount.find(group);
107 if (itCounts == tree[i].pcount.end()) { //new group, add it
108 tree[i].pcount[group] = 1;
110 tree[i].pcount[group]++;
114 itCounts = tree[i].pGroups.find(group);
115 if (itCounts == tree[i].pGroups.end()) { //new group, add it
116 tree[i].pGroups[group] = 1;
118 tree[i].pGroups[group]++;
122 if(tree[i].pGroups[group] > maxPars){
123 maxPars = tree[i].pGroups[group];
128 if (maxPars > 1) { //then we have some more dominant groups
129 //erase all the groups that are less than maxPars because you found a more dominant group.
130 for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){
131 if(it->second < maxPars){
132 tree[i].pGroups.erase(it++);
135 //set one remaining groups to 1
136 for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){
137 tree[i].pGroups[it->first] = 1;
141 //update groups to reflect all the groups this node represents
142 vector<string> nodeGroups;
143 map<string, int>::iterator itGroups;
144 for (itGroups = tree[i].pcount.begin(); itGroups != tree[i].pcount.end(); itGroups++) {
145 nodeGroups.push_back(itGroups->first);
147 tree[i].setGroup(nodeGroups);
153 //cout << "addNamesToCounts\t" << (B - A) / CLOCKS_PER_SEC << endl;
156 catch(exception& e) {
157 m->errorOut(e, "Tree", "addNamesToCounts");
161 /*****************************************************************/
162 int Tree::getIndex(string searchName) {
164 //Treemap knows name, group and index to speed up search
165 // getIndex function will return the vector index or -1 if seq is not found.
166 int index = globaldata->gTreemap->getIndex(searchName);
170 catch(exception& e) {
171 m->errorOut(e, "Tree", "getIndex");
175 /*****************************************************************/
177 void Tree::setIndex(string searchName, int index) {
179 //set index in treemap
180 globaldata->gTreemap->setIndex(searchName, index);
182 catch(exception& e) {
183 m->errorOut(e, "Tree", "setIndex");
187 /*****************************************************************/
188 int Tree::assembleTree() {
192 //if user has given a names file we want to include that info in the pgroups and pcount info.
193 if(globaldata->names.size() != 0) { addNamesToCounts(); }
195 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
196 for (int i = numLeaves; i < numNodes; i++) {
197 if (m->control_pressed) { return 1; }
199 tree[i].pGroups = (mergeGroups(i));
200 tree[i].pcount = (mergeGcounts(i));
203 //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl;
206 catch(exception& e) {
207 m->errorOut(e, "Tree", "assembleTree");
211 /*****************************************************************/
212 void Tree::getCopy(Tree* copy) {
215 //for each node in the tree copy its info
216 for (int i = 0; i < numNodes; i++) {
218 tree[i].setName(copy->tree[i].getName());
221 tree[i].setGroup(copy->tree[i].getGroup());
224 tree[i].setBranchLength(copy->tree[i].getBranchLength());
227 tree[i].setParent(copy->tree[i].getParent());
230 tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
232 //copy index in node and tmap
233 tree[i].setIndex(copy->tree[i].getIndex());
234 setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
237 tree[i].pGroups = copy->tree[i].pGroups;
240 tree[i].pcount = copy->tree[i].pcount;
244 catch(exception& e) {
245 m->errorOut(e, "Tree", "getCopy");
249 /*****************************************************************/
250 //returns a map with a groupname and the number of times that group was seen in the children
251 //for instance if your children are white and black then it would return a map with 2 entries
252 // p[white] = 1 and p[black] = 1. Now go up a level and merge that with a node who has p[white] = 1
253 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
255 map<string, int> Tree::mergeGroups(int i) {
257 int lc = tree[i].getLChild();
258 int rc = tree[i].getRChild();
260 //set parsimony groups to left child
261 map<string,int> parsimony = tree[lc].pGroups;
265 //look at right child groups and update maxPars if right child has something higher for that group.
266 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
267 it2 = parsimony.find(it->first);
268 if (it2 != parsimony.end()) {
269 parsimony[it->first]++;
271 parsimony[it->first] = 1;
274 if(parsimony[it->first] > maxPars){
275 maxPars = parsimony[it->first];
279 // this is true if right child had a greater parsimony for a certain group
281 //erase all the groups that are only 1 because you found something with 2.
282 for(it=parsimony.begin();it!=parsimony.end();){
284 parsimony.erase(it++);
287 //set one remaining groups to 1
288 //so with our above example p[white] = 2 would be left and it would become p[white] = 1
289 for(it=parsimony.begin();it!=parsimony.end();it++){
290 parsimony[it->first] = 1;
297 catch(exception& e) {
298 m->errorOut(e, "Tree", "mergeGroups");
302 /*****************************************************************/
303 //returns a map with a groupname and the number of times that group was seen in the children
304 //for instance if your children are white and black then it would return a map with 2 entries
305 // p[white] = 1 and p[black] = 1. Now go up a level and merge that with a node who has p[white] = 1
306 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
308 map<string, int> Tree::mergeUserGroups(int i, vector<string> g) {
311 int lc = tree[i].getLChild();
312 int rc = tree[i].getRChild();
314 //loop through nodes groups removing the ones the user doesn't want
315 for(it=tree[lc].pGroups.begin();it!=tree[lc].pGroups.end();){
316 if (m->inUsersGroups(it->first, g) != true) {
317 tree[lc].pGroups.erase(it++);
321 //loop through nodes groups removing the ones the user doesn't want
322 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();){
323 if (m->inUsersGroups(it->first, g) != true) {
324 tree[rc].pGroups.erase(it++);
328 //set parsimony groups to left child
329 map<string,int> parsimony = tree[lc].pGroups;
333 //look at right child groups and update maxPars if right child has something higher for that group.
334 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
335 it2 = parsimony.find(it->first);
336 if (it2 != parsimony.end()) {
337 parsimony[it->first]++;
339 parsimony[it->first] = 1;
342 if(parsimony[it->first] > maxPars){
343 maxPars = parsimony[it->first];
347 // this is true if right child had a greater parsimony for a certain group
349 //erase all the groups that are only 1 because you found something with 2.
350 for(it=parsimony.begin();it!=parsimony.end();){
352 parsimony.erase(it++);
356 for(it=parsimony.begin();it!=parsimony.end();it++){
357 parsimony[it->first] = 1;
363 catch(exception& e) {
364 m->errorOut(e, "Tree", "mergeUserGroups");
370 /**************************************************************************************************/
372 map<string,int> Tree::mergeGcounts(int position) {
374 map<string,int>::iterator pos;
376 int lc = tree[position].getLChild();
377 int rc = tree[position].getRChild();
379 map<string,int> sum = tree[lc].pcount;
381 for(it=tree[rc].pcount.begin();it!=tree[rc].pcount.end();it++){
382 sum[it->first] += it->second;
386 catch(exception& e) {
387 m->errorOut(e, "Tree", "mergeGcounts");
391 /**************************************************************************************************/
393 void Tree::randomLabels(vector<string> g) {
396 for(int i = 0; i < numLeaves; i++){
398 //get random index to switch with
399 z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
401 //you only want to randomize the nodes that are from a group the user wants analyzed, so
402 //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them.
405 treez = m->inUsersGroups(tree[z].getGroup(), g);
406 treei = m->inUsersGroups(tree[i].getGroup(), g);
408 if ((treez == true) && (treei == true)) {
409 //switches node i and node z's info.
410 map<string,int> lib_hold = tree[z].pGroups;
411 tree[z].pGroups = (tree[i].pGroups);
412 tree[i].pGroups = (lib_hold);
414 vector<string> zgroup = tree[z].getGroup();
415 tree[z].setGroup(tree[i].getGroup());
416 tree[i].setGroup(zgroup);
418 string zname = tree[z].getName();
419 tree[z].setName(tree[i].getName());
420 tree[i].setName(zname);
422 map<string,int> gcount_hold = tree[z].pcount;
423 tree[z].pcount = (tree[i].pcount);
424 tree[i].pcount = (gcount_hold);
428 catch(exception& e) {
429 m->errorOut(e, "Tree", "randomLabels");
433 /**************************************************************************************************
435 void Tree::randomLabels(string groupA, string groupB) {
437 int numSeqsA = globaldata->gTreemap->seqsPerGroup[groupA];
438 int numSeqsB = globaldata->gTreemap->seqsPerGroup[groupB];
440 vector<string> randomGroups(numSeqsA+numSeqsB, groupA);
441 for(int i=numSeqsA;i<randomGroups.size();i++){
442 randomGroups[i] = groupB;
444 random_shuffle(randomGroups.begin(), randomGroups.end());
446 int randomCounter = 0;
447 for(int i=0;i<numLeaves;i++){
448 if(tree[i].getGroup() == groupA || tree[i].getGroup() == groupB){
449 tree[i].setGroup(randomGroups[randomCounter]);
450 tree[i].pcount.clear();
451 tree[i].pcount[randomGroups[randomCounter]] = 1;
452 tree[i].pGroups.clear();
453 tree[i].pGroups[randomGroups[randomCounter]] = 1;
458 catch(exception& e) {
459 m->errorOut(e, "Tree", "randomLabels");
463 /**************************************************************************************************/
464 void Tree::randomBlengths() {
466 for(int i=numNodes-1;i>=0;i--){
467 int z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
469 float bl_hold = tree[z].getBranchLength();
470 tree[z].setBranchLength(tree[i].getBranchLength());
471 tree[i].setBranchLength(bl_hold);
474 catch(exception& e) {
475 m->errorOut(e, "Tree", "randomBlengths");
479 /*************************************************************************************************/
480 void Tree::assembleRandomUnifracTree(vector<string> g) {
484 /*************************************************************************************************/
485 void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
487 vector<string> temp; temp.push_back(groupA); temp.push_back(groupB);
492 /*************************************************************************************************/
493 //for now it's just random topology but may become random labels as well later that why this is such a simple function now...
494 void Tree::assembleRandomTree() {
498 /**************************************************************************************************/
500 void Tree::randomTopology() {
502 for(int i=0;i<numNodes;i++){
503 tree[i].setParent(-1);
505 for(int i=numLeaves;i<numNodes;i++){
506 tree[i].setChildren(-1, -1);
509 for(int i=numLeaves;i<numNodes;i++){
511 int rnd_index1, rnd_index2;
513 rnd_index1 = (int)(((double)rand() / (double) RAND_MAX)*i);
514 if(tree[rnd_index1].getParent() == -1){escape = 1;}
519 rnd_index2 = (int)(((double)rand() / (double) RAND_MAX)*i);
520 if(rnd_index2 != rnd_index1 && tree[rnd_index2].getParent() == -1){
525 tree[i].setChildren(rnd_index1,rnd_index2);
526 tree[i].setParent(-1);
527 tree[rnd_index1].setParent(i);
528 tree[rnd_index2].setParent(i);
531 catch(exception& e) {
532 m->errorOut(e, "Tree", "randomTopology");
536 /*****************************************************************/
537 void Tree::print(ostream& out) {
539 int root = findRoot();
540 printBranch(root, out, "branch");
543 catch(exception& e) {
544 m->errorOut(e, "Tree", "print");
548 /*****************************************************************/
549 void Tree::printForBoot(ostream& out) {
551 int root = findRoot();
552 printBranch(root, out, "boot");
555 catch(exception& e) {
556 m->errorOut(e, "Tree", "printForBoot");
561 /*****************************************************************/
562 // This prints out the tree in Newick form.
563 void Tree::createNewickFile(string f) {
565 int root = findRoot();
566 //filename = m->getRootName(globaldata->getTreeFile()) + "newick";
569 m->openOutputFile(filename, out);
571 printBranch(root, out, "branch");
573 // you are at the end of the tree
577 catch(exception& e) {
578 m->errorOut(e, "Tree", "createNewickFile");
583 /*****************************************************************/
584 //This function finds the index of the root node.
586 int Tree::findRoot() {
588 for (int i = 0; i < numNodes; i++) {
590 if (tree[i].getParent() == -1) { return i; }
591 //cout << "i = " << i << endl;
592 //cout << "i's parent = " << tree[i].getParent() << endl;
596 catch(exception& e) {
597 m->errorOut(e, "Tree", "findRoot");
602 /*****************************************************************/
603 void Tree::printBranch(int node, ostream& out, string mode) {
606 // you are not a leaf
607 if (tree[node].getLChild() != -1) {
609 printBranch(tree[node].getLChild(), out, mode);
611 printBranch(tree[node].getRChild(), out, mode);
613 if (mode == "branch") {
614 //if there is a branch length then print it
615 if (tree[node].getBranchLength() != -1) {
616 out << ":" << tree[node].getBranchLength();
618 }else if (mode == "boot") {
619 //if there is a label then print it
620 if (tree[node].getLabel() != -1) {
621 out << tree[node].getLabel();
624 }else { //you are a leaf
625 string leafGroup = globaldata->gTreemap->getGroup(tree[node].getName());
628 if (mode == "branch") {
629 //if there is a branch length then print it
630 if (tree[node].getBranchLength() != -1) {
631 out << ":" << tree[node].getBranchLength();
633 }else if (mode == "boot") {
634 //if there is a label then print it
635 if (tree[node].getLabel() != -1) {
636 out << tree[node].getLabel();
642 catch(exception& e) {
643 m->errorOut(e, "Tree", "printBranch");
648 /*****************************************************************/
650 void Tree::printTree() {
652 for(int i=0;i<numNodes;i++){
659 /*****************************************************************/
660 //this code is a mess and should be rethought...-slw
661 void Tree::parseTreeFile() {
663 //only takes names from the first tree and assumes that all trees use the same names.
665 string filename = globaldata->getTreeFile();
667 m->openInputFile(filename, filehandle);
672 //ifyou are not a nexus file
673 if((c = filehandle.peek()) != '#') {
674 while((c = filehandle.peek()) != ';') {
675 while ((c = filehandle.peek()) != ';') {
683 if((c == '(') && (comment != 1)){ break; }
687 done = readTreeString(filehandle);
688 if (done == 0) { break; }
690 //ifyou are a nexus file
691 }else if((c = filehandle.peek()) == '#') {
695 while(holder != "translate" && holder != "Translate"){
696 if(holder == "[" || holder == "[!"){
702 filehandle >> holder;
704 //if there is no translate then you must read tree string otherwise use translate to get names
705 if((holder == "tree") && (comment != 1)){
706 //pass over the "tree rep.6878900 = "
707 while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF)) {;}
709 if(c == EOF) { break; }
710 filehandle.putback(c); //put back first ( of tree.
711 done = readTreeString(filehandle);
716 if (done == 0) { break; }
719 //use nexus translation rather than parsing tree to save time
720 if((holder == "translate") || (holder == "Translate")) {
722 string number, name, h;
723 h = ""; // so it enters the loop the first time
724 while((h != ";") && (number != ";")) {
725 filehandle >> number;
728 //c = , until done with translation then c = ;
729 h = name.substr(name.length()-1, name.length());
730 name.erase(name.end()-1); //erase the comma
731 globaldata->Treenames.push_back(number);
733 if(number == ";") { globaldata->Treenames.pop_back(); } //in case ';' from translation is on next line instead of next to last name
738 //for (int i = 0; i < globaldata->Treenames.size(); i++) {
739 //cout << globaldata->Treenames[i] << endl; }
740 //cout << globaldata->Treenames.size() << endl;
742 catch(exception& e) {
743 m->errorOut(e, "Tree", "parseTreeFile");
747 /*******************************************************/
749 /*******************************************************/
750 int Tree::readTreeString(ifstream& filehandle) {
755 while((c = filehandle.peek()) != ';') {
757 //cout << " at beginning of while " << k << endl;
759 //to pass over labels in trees
761 while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ c=filehandle.get(); }
762 filehandle.putback(c);
764 if(c == ';') { return 0; }
765 if(c == -1) { return 0; }
767 if((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != '\t') && (c != 32)) { //32 is space
769 c = filehandle.get();
772 while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) {
774 c = filehandle.get();
776 //cout << " in name while " << k << endl;
779 //cout << "name = " << name << endl;
780 globaldata->Treenames.push_back(name);
781 filehandle.putback(c);
783 //cout << " after putback" << k << endl;
786 if(c == ':') { //read until you reach the end of the branch length
787 while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) {
788 c = filehandle.get();
790 //cout << " in branch while " << k << endl;
792 filehandle.putback(c);
795 c = filehandle.get();
797 //cout << " here after get " << k << endl;
798 if(c == ';') { return 0; }
799 if(c == ')') { filehandle.putback(c); }
806 catch(exception& e) {
807 m->errorOut(e, "Tree", "readTreeString");
812 /*******************************************************/
814 /*******************************************************/