5 * Created by Sarah Westcott on 1/22/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
13 /*****************************************************************/
16 globaldata = GlobalData::getInstance();
17 m = MothurOut::getInstance();
19 if (globaldata->runParse == true) { parseTreeFile(); globaldata->runParse = false; }
20 //for(int i = 0; i < globaldata->Treenames.size(); i++) { cout << i << '\t' << globaldata->Treenames[i] << endl; }
21 numLeaves = globaldata->Treenames.size();
22 numNodes = 2*numLeaves - 1;
24 tree.resize(numNodes);
26 //initialize tree with correct number of nodes, name and group info.
27 for (int i = 0; i < numNodes; i++) {
28 //initialize leaf nodes
29 if (i <= (numLeaves-1)) {
30 tree[i].setName(globaldata->Treenames[i]);
31 vector<string> tempGroups; tempGroups.push_back(globaldata->gTreemap->getGroup(globaldata->Treenames[i]));
32 tree[i].setGroup(tempGroups);
33 //set pcount and pGroup for groupname to 1.
34 tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
35 tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
36 //Treemap knows name, group and index to speed up search
37 globaldata->gTreemap->setIndex(globaldata->Treenames[i], i);
39 //intialize non leaf nodes
40 }else if (i > (numLeaves-1)) {
42 vector<string> tempGroups;
43 tree[i].setGroup(tempGroups);
48 m->errorOut(e, "Tree", "Tree");
53 /*****************************************************************/
55 /*****************************************************************/
56 void Tree::addNamesToCounts() {
58 //ex. seq1 seq2,seq3,se4
64 //before this function seq1.pcount = pasture -> 1
65 //after seq1.pcount = pasture -> 2, forest -> 1, ocean -> 1
67 //before this function seq1.pgroups = pasture -> 1
68 //after seq1.pgroups = pasture -> 1 since that is the dominant group
71 //go through each leaf and update its pcounts and pgroups
72 for (int i = 0; i < numLeaves; i++) {
74 string name = tree[i].getName();
76 map<string, string>::iterator itNames = globaldata->names.find(name);
78 if (itNames == globaldata->names.end()) { m->mothurOut(name + " is not in your name file, please correct."); m->mothurOutEndLine(); exit(1); }
80 vector<string> dupNames;
81 splitAtComma(globaldata->names[name], dupNames);
83 map<string, int>::iterator itCounts;
85 for (int j = 0; j < dupNames.size(); j++) {
87 if (dupNames[j] != name) {//you already added yourself in the constructor
88 string group = globaldata->gTreemap->getGroup(dupNames[j]);
91 itCounts = tree[i].pcount.find(group);
92 if (itCounts == tree[i].pcount.end()) { //new group, add it
93 tree[i].pcount[group] = 1;
95 tree[i].pcount[group]++;
99 itCounts = tree[i].pGroups.find(group);
100 if (itCounts == tree[i].pGroups.end()) { //new group, add it
101 tree[i].pGroups[group] = 1;
103 tree[i].pGroups[group]++;
107 if(tree[i].pGroups[group] > maxPars){
108 maxPars = tree[i].pGroups[group];
113 if (maxPars > 1) { //then we have some more dominant groups
114 //erase all the groups that are less than maxPars because you found a more dominant group.
115 for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){
116 if(it->second < maxPars){
117 tree[i].pGroups.erase(it++);
120 //set one remaining groups to 1
121 for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){
122 tree[i].pGroups[it->first] = 1;
126 //update groups to reflect all the groups this node represents
127 vector<string> nodeGroups;
128 map<string, int>::iterator itGroups;
129 for (itGroups = tree[i].pcount.begin(); itGroups != tree[i].pcount.end(); itGroups++) {
130 nodeGroups.push_back(itGroups->first);
132 tree[i].setGroup(nodeGroups);
137 catch(exception& e) {
138 m->errorOut(e, "Tree", "addNamesToCounts");
142 /*****************************************************************/
143 int Tree::getIndex(string searchName) {
145 //Treemap knows name, group and index to speed up search
146 // getIndex function will return the vector index or -1 if seq is not found.
147 int index = globaldata->gTreemap->getIndex(searchName);
151 catch(exception& e) {
152 m->errorOut(e, "Tree", "getIndex");
156 /*****************************************************************/
158 void Tree::setIndex(string searchName, int index) {
160 //set index in treemap
161 globaldata->gTreemap->setIndex(searchName, index);
163 catch(exception& e) {
164 m->errorOut(e, "Tree", "setIndex");
168 /*****************************************************************/
169 int Tree::assembleTree() {
172 //if user has given a names file we want to include that info in the pgroups and pcount info.
173 if(globaldata->names.size() != 0) { addNamesToCounts(); }
175 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
176 for (int i = numLeaves; i < numNodes; i++) {
177 if (m->control_pressed) { return 1; }
179 tree[i].pGroups = (mergeGroups(i));
180 tree[i].pcount = (mergeGcounts(i));
185 catch(exception& e) {
186 m->errorOut(e, "Tree", "assembleTree");
190 /*****************************************************************/
191 void Tree::getCopy(Tree* copy) {
194 //for each node in the tree copy its info
195 for (int i = 0; i < numNodes; i++) {
197 tree[i].setName(copy->tree[i].getName());
200 tree[i].setGroup(copy->tree[i].getGroup());
203 tree[i].setBranchLength(copy->tree[i].getBranchLength());
206 tree[i].setParent(copy->tree[i].getParent());
209 tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
211 //copy index in node and tmap
212 tree[i].setIndex(copy->tree[i].getIndex());
213 setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
216 tree[i].pGroups = copy->tree[i].pGroups;
219 tree[i].pcount = copy->tree[i].pcount;
223 catch(exception& e) {
224 m->errorOut(e, "Tree", "getCopy");
228 /*****************************************************************/
229 //returns a map with a groupname and the number of times that group was seen in the children
230 //for instance if your children are white and black then it would return a map with 2 entries
231 // p[white] = 1 and p[black] = 1. Now go up a level and merge that with a node who has p[white] = 1
232 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
234 map<string, int> Tree::mergeGroups(int i) {
236 int lc = tree[i].getLChild();
237 int rc = tree[i].getRChild();
239 //set parsimony groups to left child
240 map<string,int> parsimony = tree[lc].pGroups;
244 //look at right child groups and update maxPars if right child has something higher for that group.
245 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
246 it2 = parsimony.find(it->first);
247 if (it2 != parsimony.end()) {
248 parsimony[it->first]++;
250 parsimony[it->first] = 1;
253 if(parsimony[it->first] > maxPars){
254 maxPars = parsimony[it->first];
258 // this is true if right child had a greater parsimony for a certain group
260 //erase all the groups that are only 1 because you found something with 2.
261 for(it=parsimony.begin();it!=parsimony.end();){
263 parsimony.erase(it++);
266 //set one remaining groups to 1
267 //so with our above example p[white] = 2 would be left and it would become p[white] = 1
268 for(it=parsimony.begin();it!=parsimony.end();it++){
269 parsimony[it->first] = 1;
276 catch(exception& e) {
277 m->errorOut(e, "Tree", "mergeGroups");
281 /*****************************************************************/
282 //returns a map with a groupname and the number of times that group was seen in the children
283 //for instance if your children are white and black then it would return a map with 2 entries
284 // p[white] = 1 and p[black] = 1. Now go up a level and merge that with a node who has p[white] = 1
285 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
287 map<string, int> Tree::mergeUserGroups(int i, vector<string> g) {
290 int lc = tree[i].getLChild();
291 int rc = tree[i].getRChild();
293 //loop through nodes groups removing the ones the user doesn't want
294 for(it=tree[lc].pGroups.begin();it!=tree[lc].pGroups.end();){
295 if (inUsersGroups(it->first, g) != true) {
296 tree[lc].pGroups.erase(it++);
300 //loop through nodes groups removing the ones the user doesn't want
301 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();){
302 if (inUsersGroups(it->first, g) != true) {
303 tree[rc].pGroups.erase(it++);
307 //set parsimony groups to left child
308 map<string,int> parsimony = tree[lc].pGroups;
312 //look at right child groups and update maxPars if right child has something higher for that group.
313 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
314 it2 = parsimony.find(it->first);
315 if (it2 != parsimony.end()) {
316 parsimony[it->first]++;
318 parsimony[it->first] = 1;
321 if(parsimony[it->first] > maxPars){
322 maxPars = parsimony[it->first];
326 // this is true if right child had a greater parsimony for a certain group
328 //erase all the groups that are only 1 because you found something with 2.
329 for(it=parsimony.begin();it!=parsimony.end();){
331 parsimony.erase(it++);
335 for(it=parsimony.begin();it!=parsimony.end();it++){
336 parsimony[it->first] = 1;
342 catch(exception& e) {
343 m->errorOut(e, "Tree", "mergeUserGroups");
349 /**************************************************************************************************/
351 map<string,int> Tree::mergeGcounts(int position) {
353 map<string,int>::iterator pos;
355 int lc = tree[position].getLChild();
356 int rc = tree[position].getRChild();
358 map<string,int> sum = tree[lc].pcount;
360 for(it=tree[rc].pcount.begin();it!=tree[rc].pcount.end();it++){
361 sum[it->first] += it->second;
365 catch(exception& e) {
366 m->errorOut(e, "Tree", "mergeGcounts");
370 /**************************************************************************************************/
372 void Tree::randomLabels(vector<string> g) {
375 for(int i = 0; i < numLeaves; i++){
377 //get random index to switch with
378 z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
380 //you only want to randomize the nodes that are from a group the user wants analyzed, so
381 //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them.
384 treez = inUsersGroups(tree[z].getGroup(), g);
385 treei = inUsersGroups(tree[i].getGroup(), g);
387 if ((treez == true) && (treei == true)) {
388 //switches node i and node z's info.
389 map<string,int> lib_hold = tree[z].pGroups;
390 tree[z].pGroups = (tree[i].pGroups);
391 tree[i].pGroups = (lib_hold);
393 vector<string> zgroup = tree[z].getGroup();
394 tree[z].setGroup(tree[i].getGroup());
395 tree[i].setGroup(zgroup);
397 string zname = tree[z].getName();
398 tree[z].setName(tree[i].getName());
399 tree[i].setName(zname);
401 map<string,int> gcount_hold = tree[z].pcount;
402 tree[z].pcount = (tree[i].pcount);
403 tree[i].pcount = (gcount_hold);
407 catch(exception& e) {
408 m->errorOut(e, "Tree", "randomLabels");
412 /**************************************************************************************************
414 void Tree::randomLabels(string groupA, string groupB) {
416 int numSeqsA = globaldata->gTreemap->seqsPerGroup[groupA];
417 int numSeqsB = globaldata->gTreemap->seqsPerGroup[groupB];
419 vector<string> randomGroups(numSeqsA+numSeqsB, groupA);
420 for(int i=numSeqsA;i<randomGroups.size();i++){
421 randomGroups[i] = groupB;
423 random_shuffle(randomGroups.begin(), randomGroups.end());
425 int randomCounter = 0;
426 for(int i=0;i<numLeaves;i++){
427 if(tree[i].getGroup() == groupA || tree[i].getGroup() == groupB){
428 tree[i].setGroup(randomGroups[randomCounter]);
429 tree[i].pcount.clear();
430 tree[i].pcount[randomGroups[randomCounter]] = 1;
431 tree[i].pGroups.clear();
432 tree[i].pGroups[randomGroups[randomCounter]] = 1;
437 catch(exception& e) {
438 m->errorOut(e, "Tree", "randomLabels");
442 /**************************************************************************************************/
443 void Tree::randomBlengths() {
445 for(int i=numNodes-1;i>=0;i--){
446 int z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
448 float bl_hold = tree[z].getBranchLength();
449 tree[z].setBranchLength(tree[i].getBranchLength());
450 tree[i].setBranchLength(bl_hold);
453 catch(exception& e) {
454 m->errorOut(e, "Tree", "randomBlengths");
458 /*************************************************************************************************/
459 void Tree::assembleRandomUnifracTree(vector<string> g) {
463 /*************************************************************************************************/
464 void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
466 vector<string> temp; temp.push_back(groupA); temp.push_back(groupB);
471 /*************************************************************************************************/
472 //for now it's just random topology but may become random labels as well later that why this is such a simple function now...
473 void Tree::assembleRandomTree() {
477 /**************************************************************************************************/
479 void Tree::randomTopology() {
481 for(int i=0;i<numNodes;i++){
482 tree[i].setParent(-1);
484 for(int i=numLeaves;i<numNodes;i++){
485 tree[i].setChildren(-1, -1);
488 for(int i=numLeaves;i<numNodes;i++){
490 int rnd_index1, rnd_index2;
492 rnd_index1 = (int)(((double)rand() / (double) RAND_MAX)*i);
493 if(tree[rnd_index1].getParent() == -1){escape = 1;}
498 rnd_index2 = (int)(((double)rand() / (double) RAND_MAX)*i);
499 if(rnd_index2 != rnd_index1 && tree[rnd_index2].getParent() == -1){
504 tree[i].setChildren(rnd_index1,rnd_index2);
505 tree[i].setParent(-1);
506 tree[rnd_index1].setParent(i);
507 tree[rnd_index2].setParent(i);
510 catch(exception& e) {
511 m->errorOut(e, "Tree", "randomTopology");
515 /*****************************************************************/
516 void Tree::print(ostream& out) {
518 int root = findRoot();
519 printBranch(root, out, "branch");
522 catch(exception& e) {
523 m->errorOut(e, "Tree", "print");
527 /*****************************************************************/
528 void Tree::printForBoot(ostream& out) {
530 int root = findRoot();
531 printBranch(root, out, "boot");
534 catch(exception& e) {
535 m->errorOut(e, "Tree", "printForBoot");
540 /*****************************************************************/
541 // This prints out the tree in Newick form.
542 void Tree::createNewickFile(string f) {
544 int root = findRoot();
545 //filename = getRootName(globaldata->getTreeFile()) + "newick";
548 openOutputFile(filename, out);
550 printBranch(root, out, "branch");
552 // you are at the end of the tree
556 catch(exception& e) {
557 m->errorOut(e, "Tree", "createNewickFile");
562 /*****************************************************************/
563 //This function finds the index of the root node.
565 int Tree::findRoot() {
567 for (int i = 0; i < numNodes; i++) {
569 if (tree[i].getParent() == -1) { return i; }
570 //cout << "i = " << i << endl;
571 //cout << "i's parent = " << tree[i].getParent() << endl;
575 catch(exception& e) {
576 m->errorOut(e, "Tree", "findRoot");
581 /*****************************************************************/
582 void Tree::printBranch(int node, ostream& out, string mode) {
585 // you are not a leaf
586 if (tree[node].getLChild() != -1) {
588 printBranch(tree[node].getLChild(), out, mode);
590 printBranch(tree[node].getRChild(), out, mode);
592 if (mode == "branch") {
593 //if there is a branch length then print it
594 if (tree[node].getBranchLength() != -1) {
595 out << ":" << tree[node].getBranchLength();
597 }else if (mode == "boot") {
598 //if there is a label then print it
599 if (tree[node].getLabel() != -1) {
600 out << tree[node].getLabel();
603 }else { //you are a leaf
604 string leafGroup = globaldata->gTreemap->getGroup(tree[node].getName());
607 if (mode == "branch") {
608 //if there is a branch length then print it
609 if (tree[node].getBranchLength() != -1) {
610 out << ":" << tree[node].getBranchLength();
612 }else if (mode == "boot") {
613 //if there is a label then print it
614 if (tree[node].getLabel() != -1) {
615 out << tree[node].getLabel();
621 catch(exception& e) {
622 m->errorOut(e, "Tree", "printBranch");
627 /*****************************************************************/
629 void Tree::printTree() {
631 for(int i=0;i<numNodes;i++){
638 /*****************************************************************/
639 //this code is a mess and should be rethought...-slw
640 void Tree::parseTreeFile() {
642 //only takes names from the first tree and assumes that all trees use the same names.
644 string filename = globaldata->getTreeFile();
646 openInputFile(filename, filehandle);
651 //ifyou are not a nexus file
652 if((c = filehandle.peek()) != '#') {
653 while((c = filehandle.peek()) != ';') {
654 while ((c = filehandle.peek()) != ';') {
662 if((c == '(') && (comment != 1)){ break; }
666 done = readTreeString(filehandle);
667 if (done == 0) { break; }
669 //ifyou are a nexus file
670 }else if((c = filehandle.peek()) == '#') {
674 while(holder != "translate" && holder != "Translate"){
675 if(holder == "[" || holder == "[!"){
681 filehandle >> holder;
683 //if there is no translate then you must read tree string otherwise use translate to get names
684 if((holder == "tree") && (comment != 1)){
685 //pass over the "tree rep.6878900 = "
686 while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF)) {;}
688 if(c == EOF) { break; }
689 filehandle.putback(c); //put back first ( of tree.
690 done = readTreeString(filehandle);
695 if (done == 0) { break; }
698 //use nexus translation rather than parsing tree to save time
699 if((holder == "translate") || (holder == "Translate")) {
701 string number, name, h;
702 h = ""; // so it enters the loop the first time
703 while((h != ";") && (number != ";")) {
704 filehandle >> number;
707 //c = , until done with translation then c = ;
708 h = name.substr(name.length()-1, name.length());
709 name.erase(name.end()-1); //erase the comma
710 globaldata->Treenames.push_back(number);
712 if(number == ";") { globaldata->Treenames.pop_back(); } //in case ';' from translation is on next line instead of next to last name
717 //for (int i = 0; i < globaldata->Treenames.size(); i++) {
718 //cout << globaldata->Treenames[i] << endl; }
719 //cout << globaldata->Treenames.size() << endl;
721 catch(exception& e) {
722 m->errorOut(e, "Tree", "parseTreeFile");
726 /*******************************************************/
728 /*******************************************************/
729 int Tree::readTreeString(ifstream& filehandle) {
734 while((c = filehandle.peek()) != ';') {
736 //cout << " at beginning of while " << k << endl;
738 //to pass over labels in trees
740 while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ c=filehandle.get(); }
741 filehandle.putback(c);
743 if(c == ';') { return 0; }
744 if(c == -1) { return 0; }
746 if((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != '\t') && (c != 32)) { //32 is space
748 c = filehandle.get();
751 while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) {
753 c = filehandle.get();
755 //cout << " in name while " << k << endl;
758 //cout << "name = " << name << endl;
759 globaldata->Treenames.push_back(name);
760 filehandle.putback(c);
762 //cout << " after putback" << k << endl;
765 if(c == ':') { //read until you reach the end of the branch length
766 while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) {
767 c = filehandle.get();
769 //cout << " in branch while " << k << endl;
771 filehandle.putback(c);
774 c = filehandle.get();
776 //cout << " here after get " << k << endl;
777 if(c == ';') { return 0; }
778 if(c == ')') { filehandle.putback(c); }
785 catch(exception& e) {
786 m->errorOut(e, "Tree", "readTreeString");
791 /*******************************************************/
793 /*******************************************************/