5 * Created by Sarah Westcott on 1/22/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
13 /*****************************************************************/
16 globaldata = GlobalData::getInstance();
18 if (globaldata->runParse == true) { parseTreeFile(); globaldata->runParse = false; }
19 //for(int i = 0; i < globaldata->Treenames.size(); i++) { cout << i << '\t' << globaldata->Treenames[i] << endl; }
20 numLeaves = globaldata->Treenames.size();
21 numNodes = 2*numLeaves - 1;
23 tree.resize(numNodes);
25 //initialize tree with correct number of nodes, name and group info.
26 for (int i = 0; i < numNodes; i++) {
27 //initialize leaf nodes
28 if (i <= (numLeaves-1)) {
29 tree[i].setName(globaldata->Treenames[i]);
30 vector<string> tempGroups; tempGroups.push_back(globaldata->gTreemap->getGroup(globaldata->Treenames[i]));
31 tree[i].setGroup(tempGroups);
32 //set pcount and pGroup for groupname to 1.
33 tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
34 tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
35 //Treemap knows name, group and index to speed up search
36 globaldata->gTreemap->setIndex(globaldata->Treenames[i], i);
38 //intialize non leaf nodes
39 }else if (i > (numLeaves-1)) {
41 vector<string> tempGroups;
42 tree[i].setGroup(tempGroups);
47 errorOut(e, "Tree", "Tree");
52 /*****************************************************************/
54 /*****************************************************************/
55 void Tree::addNamesToCounts() {
57 //ex. seq1 seq2,seq3,se4
63 //before this function seq1.pcount = pasture -> 1
64 //after seq1.pcount = pasture -> 2, forest -> 1, ocean -> 1
66 //before this function seq1.pgroups = pasture -> 1
67 //after seq1.pgroups = pasture -> 1 since that is the dominant group
70 //go through each leaf and update its pcounts and pgroups
71 for (int i = 0; i < numLeaves; i++) {
72 string name = tree[i].getName();
74 map<string, string>::iterator itNames = globaldata->names.find(name);
76 if (itNames == globaldata->names.end()) { mothurOut(name + " is not in your name file, please correct."); mothurOutEndLine(); exit(1); }
78 vector<string> dupNames;
79 splitAtComma(globaldata->names[name], dupNames);
81 map<string, int>::iterator itCounts;
83 for (int j = 0; j < dupNames.size(); j++) {
85 if (dupNames[j] != name) {//you already added yourself in the constructor
86 string group = globaldata->gTreemap->getGroup(dupNames[j]);
89 itCounts = tree[i].pcount.find(group);
90 if (itCounts == tree[i].pcount.end()) { //new group, add it
91 tree[i].pcount[group] = 1;
93 tree[i].pcount[group]++;
97 itCounts = tree[i].pGroups.find(group);
98 if (itCounts == tree[i].pGroups.end()) { //new group, add it
99 tree[i].pGroups[group] = 1;
101 tree[i].pGroups[group]++;
105 if(tree[i].pGroups[group] > maxPars){
106 maxPars = tree[i].pGroups[group];
111 if (maxPars > 1) { //then we have some more dominant groups
112 //erase all the groups that are less than maxPars because you found a more dominant group.
113 for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){
114 if(it->second < maxPars){
115 tree[i].pGroups.erase(it++);
118 //set one remaining groups to 1
119 for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){
120 tree[i].pGroups[it->first] = 1;
124 //update groups to reflect all the groups this node represents
125 vector<string> nodeGroups;
126 map<string, int>::iterator itGroups;
127 for (itGroups = tree[i].pcount.begin(); itGroups != tree[i].pcount.end(); itGroups++) {
128 nodeGroups.push_back(itGroups->first);
130 tree[i].setGroup(nodeGroups);
136 catch(exception& e) {
137 errorOut(e, "Tree", "addNamesToCounts");
141 /*****************************************************************/
142 int Tree::getIndex(string searchName) {
144 //Treemap knows name, group and index to speed up search
145 // getIndex function will return the vector index or -1 if seq is not found.
146 int index = globaldata->gTreemap->getIndex(searchName);
150 catch(exception& e) {
151 errorOut(e, "Tree", "getIndex");
155 /*****************************************************************/
157 void Tree::setIndex(string searchName, int index) {
159 //set index in treemap
160 globaldata->gTreemap->setIndex(searchName, index);
162 catch(exception& e) {
163 errorOut(e, "Tree", "setIndex");
167 /*****************************************************************/
168 void Tree::assembleTree() {
171 //if user has given a names file we want to include that info in the pgroups and pcount info.
172 if(globaldata->names.size() != 0) { addNamesToCounts(); }
174 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
175 for (int i = numLeaves; i < numNodes; i++) {
176 tree[i].pGroups = (mergeGroups(i));
177 tree[i].pcount = (mergeGcounts(i));
180 catch(exception& e) {
181 errorOut(e, "Tree", "assembleTree");
185 /*****************************************************************/
186 void Tree::getCopy(Tree* copy) {
189 //for each node in the tree copy its info
190 for (int i = 0; i < numNodes; i++) {
192 tree[i].setName(copy->tree[i].getName());
195 tree[i].setGroup(copy->tree[i].getGroup());
198 tree[i].setBranchLength(copy->tree[i].getBranchLength());
201 tree[i].setParent(copy->tree[i].getParent());
204 tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
206 //copy index in node and tmap
207 tree[i].setIndex(copy->tree[i].getIndex());
208 setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
211 tree[i].pGroups = copy->tree[i].pGroups;
214 tree[i].pcount = copy->tree[i].pcount;
218 catch(exception& e) {
219 errorOut(e, "Tree", "getCopy");
223 /*****************************************************************/
224 //returns a map with a groupname and the number of times that group was seen in the children
225 //for instance if your children are white and black then it would return a map with 2 entries
226 // p[white] = 1 and p[black] = 1. Now go up a level and merge that with a node who has p[white] = 1
227 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
229 map<string, int> Tree::mergeGroups(int i) {
231 int lc = tree[i].getLChild();
232 int rc = tree[i].getRChild();
234 //set parsimony groups to left child
235 map<string,int> parsimony = tree[lc].pGroups;
239 //look at right child groups and update maxPars if right child has something higher for that group.
240 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
241 it2 = parsimony.find(it->first);
242 if (it2 != parsimony.end()) {
243 parsimony[it->first]++;
245 parsimony[it->first] = 1;
248 if(parsimony[it->first] > maxPars){
249 maxPars = parsimony[it->first];
253 // this is true if right child had a greater parsimony for a certain group
255 //erase all the groups that are only 1 because you found something with 2.
256 for(it=parsimony.begin();it!=parsimony.end();){
258 parsimony.erase(it++);
261 //set one remaining groups to 1
262 //so with our above example p[white] = 2 would be left and it would become p[white] = 1
263 for(it=parsimony.begin();it!=parsimony.end();it++){
264 parsimony[it->first] = 1;
271 catch(exception& e) {
272 errorOut(e, "Tree", "mergeGroups");
276 /*****************************************************************/
277 //returns a map with a groupname and the number of times that group was seen in the children
278 //for instance if your children are white and black then it would return a map with 2 entries
279 // p[white] = 1 and p[black] = 1. Now go up a level and merge that with a node who has p[white] = 1
280 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
282 map<string, int> Tree::mergeUserGroups(int i, vector<string> g) {
285 int lc = tree[i].getLChild();
286 int rc = tree[i].getRChild();
288 //loop through nodes groups removing the ones the user doesn't want
289 for(it=tree[lc].pGroups.begin();it!=tree[lc].pGroups.end();){
290 if (inUsersGroups(it->first, g) != true) {
291 tree[lc].pGroups.erase(it++);
295 //loop through nodes groups removing the ones the user doesn't want
296 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();){
297 if (inUsersGroups(it->first, g) != true) {
298 tree[rc].pGroups.erase(it++);
302 //set parsimony groups to left child
303 map<string,int> parsimony = tree[lc].pGroups;
307 //look at right child groups and update maxPars if right child has something higher for that group.
308 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
309 it2 = parsimony.find(it->first);
310 if (it2 != parsimony.end()) {
311 parsimony[it->first]++;
313 parsimony[it->first] = 1;
316 if(parsimony[it->first] > maxPars){
317 maxPars = parsimony[it->first];
321 // this is true if right child had a greater parsimony for a certain group
323 //erase all the groups that are only 1 because you found something with 2.
324 for(it=parsimony.begin();it!=parsimony.end();){
326 parsimony.erase(it++);
330 for(it=parsimony.begin();it!=parsimony.end();it++){
331 parsimony[it->first] = 1;
337 catch(exception& e) {
338 errorOut(e, "Tree", "mergeUserGroups");
344 /**************************************************************************************************/
346 map<string,int> Tree::mergeGcounts(int position) {
348 map<string,int>::iterator pos;
350 int lc = tree[position].getLChild();
351 int rc = tree[position].getRChild();
353 map<string,int> sum = tree[lc].pcount;
355 for(it=tree[rc].pcount.begin();it!=tree[rc].pcount.end();it++){
356 sum[it->first] += it->second;
360 catch(exception& e) {
361 errorOut(e, "Tree", "mergeGcounts");
365 /**************************************************************************************************/
367 void Tree::randomLabels(vector<string> g) {
370 for(int i = 0; i < numLeaves; i++){
372 //get random index to switch with
373 z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
375 //you only want to randomize the nodes that are from a group the user wants analyzed, so
376 //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them.
379 treez = inUsersGroups(tree[z].getGroup(), g);
380 treei = inUsersGroups(tree[i].getGroup(), g);
382 if ((treez == true) && (treei == true)) {
383 //switches node i and node z's info.
384 map<string,int> lib_hold = tree[z].pGroups;
385 tree[z].pGroups = (tree[i].pGroups);
386 tree[i].pGroups = (lib_hold);
388 vector<string> zgroup = tree[z].getGroup();
389 tree[z].setGroup(tree[i].getGroup());
390 tree[i].setGroup(zgroup);
392 string zname = tree[z].getName();
393 tree[z].setName(tree[i].getName());
394 tree[i].setName(zname);
396 map<string,int> gcount_hold = tree[z].pcount;
397 tree[z].pcount = (tree[i].pcount);
398 tree[i].pcount = (gcount_hold);
402 catch(exception& e) {
403 errorOut(e, "Tree", "randomLabels");
407 /**************************************************************************************************
409 void Tree::randomLabels(string groupA, string groupB) {
411 int numSeqsA = globaldata->gTreemap->seqsPerGroup[groupA];
412 int numSeqsB = globaldata->gTreemap->seqsPerGroup[groupB];
414 vector<string> randomGroups(numSeqsA+numSeqsB, groupA);
415 for(int i=numSeqsA;i<randomGroups.size();i++){
416 randomGroups[i] = groupB;
418 random_shuffle(randomGroups.begin(), randomGroups.end());
420 int randomCounter = 0;
421 for(int i=0;i<numLeaves;i++){
422 if(tree[i].getGroup() == groupA || tree[i].getGroup() == groupB){
423 tree[i].setGroup(randomGroups[randomCounter]);
424 tree[i].pcount.clear();
425 tree[i].pcount[randomGroups[randomCounter]] = 1;
426 tree[i].pGroups.clear();
427 tree[i].pGroups[randomGroups[randomCounter]] = 1;
432 catch(exception& e) {
433 errorOut(e, "Tree", "randomLabels");
437 /**************************************************************************************************/
438 void Tree::randomBlengths() {
440 for(int i=numNodes-1;i>=0;i--){
441 int z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
443 float bl_hold = tree[z].getBranchLength();
444 tree[z].setBranchLength(tree[i].getBranchLength());
445 tree[i].setBranchLength(bl_hold);
448 catch(exception& e) {
449 errorOut(e, "Tree", "randomBlengths");
453 /*************************************************************************************************/
454 void Tree::assembleRandomUnifracTree(vector<string> g) {
458 /*************************************************************************************************/
459 void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
461 vector<string> temp; temp.push_back(groupA); temp.push_back(groupB);
466 /*************************************************************************************************/
467 //for now it's just random topology but may become random labels as well later that why this is such a simple function now...
468 void Tree::assembleRandomTree() {
472 /**************************************************************************************************/
474 void Tree::randomTopology() {
476 for(int i=0;i<numNodes;i++){
477 tree[i].setParent(-1);
479 for(int i=numLeaves;i<numNodes;i++){
480 tree[i].setChildren(-1, -1);
483 for(int i=numLeaves;i<numNodes;i++){
485 int rnd_index1, rnd_index2;
487 rnd_index1 = (int)(((double)rand() / (double) RAND_MAX)*i);
488 if(tree[rnd_index1].getParent() == -1){escape = 1;}
493 rnd_index2 = (int)(((double)rand() / (double) RAND_MAX)*i);
494 if(rnd_index2 != rnd_index1 && tree[rnd_index2].getParent() == -1){
499 tree[i].setChildren(rnd_index1,rnd_index2);
500 tree[i].setParent(-1);
501 tree[rnd_index1].setParent(i);
502 tree[rnd_index2].setParent(i);
505 catch(exception& e) {
506 errorOut(e, "Tree", "randomTopology");
510 /*****************************************************************/
511 void Tree::print(ostream& out) {
513 int root = findRoot();
514 printBranch(root, out, "branch");
517 catch(exception& e) {
518 errorOut(e, "Tree", "print");
522 /*****************************************************************/
523 void Tree::printForBoot(ostream& out) {
525 int root = findRoot();
526 printBranch(root, out, "boot");
529 catch(exception& e) {
530 errorOut(e, "Tree", "printForBoot");
535 /*****************************************************************/
536 // This prints out the tree in Newick form.
537 void Tree::createNewickFile(string f) {
539 int root = findRoot();
540 //filename = getRootName(globaldata->getTreeFile()) + "newick";
543 openOutputFile(filename, out);
545 printBranch(root, out, "branch");
547 // you are at the end of the tree
551 catch(exception& e) {
552 errorOut(e, "Tree", "createNewickFile");
557 /*****************************************************************/
558 //This function finds the index of the root node.
560 int Tree::findRoot() {
562 for (int i = 0; i < numNodes; i++) {
564 if (tree[i].getParent() == -1) { return i; }
565 //cout << "i = " << i << endl;
566 //cout << "i's parent = " << tree[i].getParent() << endl;
570 catch(exception& e) {
571 errorOut(e, "Tree", "findRoot");
576 /*****************************************************************/
577 void Tree::printBranch(int node, ostream& out, string mode) {
580 // you are not a leaf
581 if (tree[node].getLChild() != -1) {
583 printBranch(tree[node].getLChild(), out, mode);
585 printBranch(tree[node].getRChild(), out, mode);
587 if (mode == "branch") {
588 //if there is a branch length then print it
589 if (tree[node].getBranchLength() != -1) {
590 out << ":" << tree[node].getBranchLength();
592 }else if (mode == "boot") {
593 //if there is a label then print it
594 if (tree[node].getLabel() != -1) {
595 out << tree[node].getLabel();
598 }else { //you are a leaf
599 string leafGroup = globaldata->gTreemap->getGroup(tree[node].getName());
602 if (mode == "branch") {
603 //if there is a branch length then print it
604 if (tree[node].getBranchLength() != -1) {
605 out << ":" << tree[node].getBranchLength();
607 }else if (mode == "boot") {
608 //if there is a label then print it
609 if (tree[node].getLabel() != -1) {
610 out << tree[node].getLabel();
616 catch(exception& e) {
617 errorOut(e, "Tree", "printBranch");
622 /*****************************************************************/
624 void Tree::printTree() {
626 for(int i=0;i<numNodes;i++){
633 /*****************************************************************/
634 //this code is a mess and should be rethought...-slw
635 void Tree::parseTreeFile() {
637 //only takes names from the first tree and assumes that all trees use the same names.
639 string filename = globaldata->getTreeFile();
641 openInputFile(filename, filehandle);
646 //ifyou are not a nexus file
647 if((c = filehandle.peek()) != '#') {
648 while((c = filehandle.peek()) != ';') {
649 while ((c = filehandle.peek()) != ';') {
657 if((c == '(') && (comment != 1)){ break; }
661 done = readTreeString(filehandle);
662 if (done == 0) { break; }
664 //ifyou are a nexus file
665 }else if((c = filehandle.peek()) == '#') {
669 while(holder != "translate" && holder != "Translate"){
670 if(holder == "[" || holder == "[!"){
676 filehandle >> holder;
678 //if there is no translate then you must read tree string otherwise use translate to get names
679 if((holder == "tree") && (comment != 1)){
680 //pass over the "tree rep.6878900 = "
681 while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF)) {;}
683 if(c == EOF) { break; }
684 filehandle.putback(c); //put back first ( of tree.
685 done = readTreeString(filehandle);
690 if (done == 0) { break; }
693 //use nexus translation rather than parsing tree to save time
694 if((holder == "translate") || (holder == "Translate")) {
696 string number, name, h;
697 h = ""; // so it enters the loop the first time
698 while((h != ";") && (number != ";")) {
699 filehandle >> number;
702 //c = , until done with translation then c = ;
703 h = name.substr(name.length()-1, name.length());
704 name.erase(name.end()-1); //erase the comma
705 globaldata->Treenames.push_back(number);
707 if(number == ";") { globaldata->Treenames.pop_back(); } //in case ';' from translation is on next line instead of next to last name
712 catch(exception& e) {
713 errorOut(e, "Tree", "parseTreeFile");
717 /*******************************************************/
719 /*******************************************************/
720 int Tree::readTreeString(ifstream& filehandle) {
725 while((c = filehandle.peek()) != ';') {
727 //cout << " at beginning of while " << k << endl;
729 //to pass over labels in trees
731 while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ c=filehandle.get(); }
732 filehandle.putback(c);
734 if(c == ';') { return 0; }
735 if(c == -1) { return 0; }
737 if((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != '\t') && (c != 32)) { //32 is space
739 c = filehandle.get();
742 while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) {
744 c = filehandle.get();
746 //cout << " in name while " << k << endl;
749 //cout << "name = " << name << endl;
750 globaldata->Treenames.push_back(name);
751 filehandle.putback(c);
753 //cout << " after putback" << k << endl;
756 if(c == ':') { //read until you reach the end of the branch length
757 while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) {
758 c = filehandle.get();
760 //cout << " in branch while " << k << endl;
762 filehandle.putback(c);
765 c = filehandle.get();
767 //cout << " here after get " << k << endl;
768 if(c == ';') { return 0; }
769 if(c == ')') { filehandle.putback(c); }
776 catch(exception& e) {
777 errorOut(e, "Tree", "readTreeString");
782 /*******************************************************/
784 /*******************************************************/