5 * Created by Sarah Westcott on 1/22/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
13 /*****************************************************************/
16 globaldata = GlobalData::getInstance();
18 if (globaldata->runParse == true) { parseTreeFile(); globaldata->runParse = false; }
20 numLeaves = globaldata->Treenames.size();
21 numNodes = 2*numLeaves - 1;
23 tree.resize(numNodes);
25 //initialize tree with correct number of nodes, name and group info.
26 for (int i = 0; i < numNodes; i++) {
27 //initialize leaf nodes
28 if (i <= (numLeaves-1)) {
29 tree[i].setName(globaldata->Treenames[i]);
30 tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->Treenames[i]));
31 //set pcount and pGroup for groupname to 1.
32 tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
33 tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
34 //Treemap knows name, group and index to speed up search
35 globaldata->gTreemap->setIndex(globaldata->Treenames[i], i);
37 //intialize non leaf nodes
38 }else if (i > (numLeaves-1)) {
45 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function Tree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
49 cout << "An unknown error has occurred in the Tree class function Tree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
54 /*****************************************************************/
56 /*****************************************************************/
57 int Tree::getIndex(string searchName) {
59 //Treemap knows name, group and index to speed up search
60 // getIndex function will return the vector index or -1 if seq is not found.
61 int index = globaldata->gTreemap->getIndex(searchName);
66 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function getIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
70 cout << "An unknown error has occurred in the Tree class function getIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
74 /*****************************************************************/
76 void Tree::setIndex(string searchName, int index) {
78 //set index in treemap
79 globaldata->gTreemap->setIndex(searchName, index);
82 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function setIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
86 cout << "An unknown error has occurred in the Tree class function setIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
90 /*****************************************************************/
91 void Tree::assembleTree() {
93 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
94 for (int i = numLeaves; i < numNodes; i++) {
95 tree[i].pGroups = (mergeGroups(i));
96 tree[i].pcount = (mergeGcounts(i));
100 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function assembleTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
104 cout << "An unknown error has occurred in the Tree class function assembleTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
108 /*****************************************************************/
109 void Tree::getCopy(Tree* copy) {
112 //for each node in the tree copy its info
113 for (int i = 0; i < numNodes; i++) {
115 tree[i].setName(copy->tree[i].getName());
118 tree[i].setGroup(copy->tree[i].getGroup());
121 tree[i].setBranchLength(copy->tree[i].getBranchLength());
124 tree[i].setParent(copy->tree[i].getParent());
127 tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
129 //copy index in node and tmap
130 tree[i].setIndex(copy->tree[i].getIndex());
131 setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
134 tree[i].pGroups = copy->tree[i].pGroups;
137 tree[i].pcount = copy->tree[i].pcount;
140 catch(exception& e) {
141 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function getCopy. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
145 cout << "An unknown error has occurred in the Tree class function getCopy. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
149 /*****************************************************************/
150 //returns a map with a groupname and the number of times that group was seen in the children
151 //for instance if your children are white and black then it would return a map with 2 entries
152 // p[white] = 1 and p[black] = 1. Now go up a level and merge that with a node who has p[white] = 1
153 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
155 map<string, int> Tree::mergeGroups(int i) {
157 int lc = tree[i].getLChild();
158 int rc = tree[i].getRChild();
160 //set parsimony groups to left child
161 map<string,int> parsimony = tree[lc].pGroups;
165 //look at right child groups and update maxPars if right child has something higher for that group.
166 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
167 it2 = parsimony.find(it->first);
168 if (it2 != parsimony.end()) {
169 parsimony[it->first]++;
171 parsimony[it->first] = 1;
174 if(parsimony[it->first] > maxPars){
175 maxPars = parsimony[it->first];
179 // this is true if right child had a greater parsimony for a certain group
181 //erase all the groups that are only 1 because you found something with 2.
182 for(it=parsimony.begin();it!=parsimony.end();it++){
184 parsimony.erase(it->first);
188 //set one remaining groups to 1
189 //so with our above example p[white] = 2 would be left and it would become p[white] = 1
190 for(it=parsimony.begin();it!=parsimony.end();it++){
191 parsimony[it->first] = 1;
198 catch(exception& e) {
199 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
203 cout << "An unknown error has occurred in the Tree class function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
207 /*****************************************************************/
208 //returns a map with a groupname and the number of times that group was seen in the children
209 //for instance if your children are white and black then it would return a map with 2 entries
210 // p[white] = 1 and p[black] = 1. Now go up a level and merge that with a node who has p[white] = 1
211 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
213 map<string, int> Tree::mergeUserGroups(int i, vector<string> g) {
216 int lc = tree[i].getLChild();
217 int rc = tree[i].getRChild();
219 //loop through nodes groups removing the ones the user doesn't want
220 for (it = tree[lc].pGroups.begin(); it != tree[lc].pGroups.end(); it++) {
221 if (inUsersGroups(it->first, g) != true) { tree[lc].pGroups.erase(it->first); }
224 //loop through nodes groups removing the ones the user doesn't want
225 for (it = tree[rc].pGroups.begin(); it != tree[rc].pGroups.end(); it++) {
226 if (inUsersGroups(it->first, g) != true) { tree[rc].pGroups.erase(it->first); }
229 //set parsimony groups to left child
230 map<string,int> parsimony = tree[lc].pGroups;
234 //look at right child groups and update maxPars if right child has something higher for that group.
235 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
236 it2 = parsimony.find(it->first);
237 if (it2 != parsimony.end()) {
238 parsimony[it->first]++;
240 parsimony[it->first] = 1;
243 if(parsimony[it->first] > maxPars){
244 maxPars = parsimony[it->first];
248 // this is true if right child had a greater parsimony for a certain group
250 //erase all the groups that are only 1 because you found something with 2.
251 for(it=parsimony.begin();it!=parsimony.end();it++){
253 parsimony.erase(it->first);
256 for(it=parsimony.begin();it!=parsimony.end();it++){
257 parsimony[it->first] = 1;
263 catch(exception& e) {
264 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
268 cout << "An unknown error has occurred in the Tree class function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
274 /**************************************************************************************************/
276 map<string,int> Tree::mergeGcounts(int position) {
278 map<string,int>::iterator pos;
280 int lc = tree[position].getLChild();
281 int rc = tree[position].getRChild();
283 map<string,int> sum = tree[lc].pcount;
285 for(it=tree[rc].pcount.begin();it!=tree[rc].pcount.end();it++){
286 sum[it->first] += it->second;
290 catch(exception& e) {
291 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGcounts. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
295 cout << "An unknown error has occurred in the Tree class function mergeGcounts. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
299 /**************************************************************************************************/
301 void Tree::randomLabels(vector<string> g) {
304 for(int i = 0; i < numLeaves; i++){
306 //get random index to switch with
307 z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
309 //you only want to randomize the nodes that are from a group the user wants analyzed, so
310 //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them.
313 treez = inUsersGroups(tree[z].getGroup(), g);
314 treei = inUsersGroups(tree[i].getGroup(), g);
316 if ((treez == true) && (treei == true)) {
317 //switches node i and node z's info.
318 map<string,int> lib_hold = tree[z].pGroups;
319 tree[z].pGroups = (tree[i].pGroups);
320 tree[i].pGroups = (lib_hold);
322 string zgroup = tree[z].getGroup();
323 tree[z].setGroup(tree[i].getGroup());
324 tree[i].setGroup(zgroup);
326 string zname = tree[z].getName();
327 tree[z].setName(tree[i].getName());
328 tree[i].setName(zname);
330 map<string,int> gcount_hold = tree[z].pcount;
331 tree[z].pcount = (tree[i].pcount);
332 tree[i].pcount = (gcount_hold);
336 catch(exception& e) {
337 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
341 cout << "An unknown error has occurred in the Tree class function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
345 /**************************************************************************************************/
347 void Tree::randomLabels(string groupA, string groupB) {
349 int numSeqsA = globaldata->gTreemap->seqsPerGroup[groupA];
350 int numSeqsB = globaldata->gTreemap->seqsPerGroup[groupB];
352 vector<string> randomGroups(numSeqsA+numSeqsB, groupA);
353 for(int i=numSeqsA;i<randomGroups.size();i++){
354 randomGroups[i] = groupB;
356 random_shuffle(randomGroups.begin(), randomGroups.end());
358 int randomCounter = 0;
359 for(int i=0;i<numLeaves;i++){
360 if(tree[i].getGroup() == groupA || tree[i].getGroup() == groupB){
361 tree[i].setGroup(randomGroups[randomCounter]);
362 tree[i].pcount.clear();
363 tree[i].pcount[randomGroups[randomCounter]] = 1;
364 tree[i].pGroups.clear();
365 tree[i].pGroups[randomGroups[randomCounter]] = 1;
370 catch(exception& e) {
371 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
375 cout << "An unknown error has occurred in the Tree class function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
379 /**************************************************************************************************/
380 void Tree::randomBlengths() {
382 for(int i=numNodes-1;i>=0;i--){
383 int z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
385 float bl_hold = tree[z].getBranchLength();
386 tree[z].setBranchLength(tree[i].getBranchLength());
387 tree[i].setBranchLength(bl_hold);
390 catch(exception& e) {
391 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomBlengths. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
395 cout << "An unknown error has occurred in the Tree class function randomBlengths. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
399 /*************************************************************************************************/
400 void Tree::assembleRandomUnifracTree(vector<string> g) {
404 /*************************************************************************************************/
405 void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
406 randomLabels(groupA, groupB);
410 /*************************************************************************************************/
411 //for now it's just random topology but may become random labels as well later that why this is such a simple function now...
412 void Tree::assembleRandomTree() {
416 /**************************************************************************************************/
418 void Tree::randomTopology() {
420 for(int i=0;i<numNodes;i++){
421 tree[i].setParent(-1);
423 for(int i=numLeaves;i<numNodes;i++){
424 tree[i].setChildren(-1, -1);
427 for(int i=numLeaves;i<numNodes;i++){
429 int rnd_index1, rnd_index2;
431 rnd_index1 = (int)(((double)rand() / (double) RAND_MAX)*i);
432 if(tree[rnd_index1].getParent() == -1){escape = 1;}
437 rnd_index2 = (int)(((double)rand() / (double) RAND_MAX)*i);
438 if(rnd_index2 != rnd_index1 && tree[rnd_index2].getParent() == -1){
443 tree[i].setChildren(rnd_index1,rnd_index2);
444 tree[i].setParent(-1);
445 tree[rnd_index1].setParent(i);
446 tree[rnd_index2].setParent(i);
449 catch(exception& e) {
450 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomTopology. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
454 cout << "An unknown error has occurred in the Tree class function randomTopology. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
458 /*****************************************************************/
459 void Tree::print(ostream& out) {
461 int root = findRoot();
462 printBranch(root, out, "branch");
465 catch(exception& e) {
466 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
470 cout << "An unknown error has occurred in the Tree class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
474 /*****************************************************************/
475 void Tree::printForBoot(ostream& out) {
477 int root = findRoot();
478 printBranch(root, out, "boot");
481 catch(exception& e) {
482 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function printForBoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
486 cout << "An unknown error has occurred in the Tree class function printForBoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
491 /*****************************************************************/
492 // This prints out the tree in Newick form.
493 void Tree::createNewickFile(string f) {
495 int root = findRoot();
496 //filename = getRootName(globaldata->getTreeFile()) + "newick";
498 openOutputFile(filename, out);
500 printBranch(root, out, "branch");
502 // you are at the end of the tree
506 catch(exception& e) {
507 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function createNewickFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
511 cout << "An unknown error has occurred in the Tree class function createNewickFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
516 /*****************************************************************/
517 //This function finds the index of the root node.
519 int Tree::findRoot() {
521 for (int i = 0; i < numNodes; i++) {
523 if (tree[i].getParent() == -1) { return i; }
524 //cout << "i = " << i << endl;
525 //cout << "i's parent = " << tree[i].getParent() << endl;
529 catch(exception& e) {
530 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function findRoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
534 cout << "An unknown error has occurred in the Tree class function findRoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
539 /*****************************************************************/
540 void Tree::printBranch(int node, ostream& out, string mode) {
543 // you are not a leaf
544 if (tree[node].getLChild() != -1) {
546 printBranch(tree[node].getLChild(), out, mode);
548 printBranch(tree[node].getRChild(), out, mode);
550 if (mode == "branch") {
551 //if there is a branch length then print it
552 if (tree[node].getBranchLength() != -1) {
553 out << ":" << tree[node].getBranchLength();
555 }else if (mode == "boot") {
556 //if there is a label then print it
557 if (tree[node].getLabel() != -1) {
558 out << tree[node].getLabel();
561 }else { //you are a leaf
562 out << tree[node].getGroup();
563 if (mode == "branch") {
564 //if there is a branch length then print it
565 if (tree[node].getBranchLength() != -1) {
566 out << ":" << tree[node].getBranchLength();
568 }else if (mode == "boot") {
569 //if there is a label then print it
570 if (tree[node].getLabel() != -1) {
571 out << tree[node].getLabel();
577 catch(exception& e) {
578 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
582 cout << "An unknown error has occurred in the Tree class function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
587 /*****************************************************************/
589 void Tree::printTree() {
591 for(int i=0;i<numNodes;i++){
598 /*****************************************************************/
600 void Tree::parseTreeFile() {
602 //only takes names from the first tree and assumes that all trees use the same names.
604 string filename = globaldata->getTreeFile();
606 openInputFile(filename, filehandle);
610 //ifyou are not a nexus file
611 if((c = filehandle.peek()) != '#') {
612 while((c = filehandle.peek()) != ';') {
613 while ((c = filehandle.peek()) != ';') {
621 if((c == '(') && (comment != 1)){ break; }
625 readTreeString(filehandle);
627 //ifyou are a nexus file
628 }else if((c = filehandle.peek()) == '#') {
632 while(holder != "translate" && holder != "Translate"){
633 if(holder == "[" || holder == "[!"){
639 filehandle >> holder;
641 //ifthere is no translate then you must read tree string otherwise use translate to get names
642 if(holder == "tree" && comment != 1){
643 //pass over the "tree rep.6878900 = "
644 while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF)) {;}
646 if(c == EOF) { break; }
647 filehandle.putback(c); //put back first ( of tree.
648 readTreeString(filehandle);
653 //use nexus translation rather than parsing tree to save time
654 if((holder == "translate") || (holder == "Translate")) {
656 string number, name, h;
657 h = ""; // so it enters the loop the first time
658 while((h != ";") && (number != ";")) {
659 filehandle >> number;
662 //c = , until done with translation then c = ;
663 h = name.substr(name.length()-1, name.length());
664 name.erase(name.end()-1); //erase the comma
665 globaldata->Treenames.push_back(number);
667 if(number == ";") { globaldata->Treenames.pop_back(); } //in case ';' from translation is on next line instead of next to last name
672 catch(exception& e) {
673 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function parseTreeFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
677 cout << "An unknown error has occurred in the Tree class function parseTreeFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
681 /*******************************************************/
683 /*******************************************************/
684 void Tree::readTreeString(ifstream& filehandle) {
689 while((c = filehandle.peek()) != ';') {
691 if((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != '\t') && (c != 32)) { //32 is space
693 c = filehandle.get();
696 while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) {
698 c = filehandle.get();
700 //cout << " in name while " << k << endl;
703 //cout << "name = " << name << endl;
704 globaldata->Treenames.push_back(name);
705 filehandle.putback(c);
707 //cout << " after putback" << k << endl;
710 if(c == ':') { //read until you reach the end of the branch length
711 while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) {
712 c = filehandle.get();
714 //cout << " in branch while " << k << endl;
716 filehandle.putback(c);
718 c = filehandle.get();
719 if(c == ';') { break; }
725 catch(exception& e) {
726 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function parseTreeFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
730 cout << "An unknown error has occurred in the Tree class function parseTreeFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
735 /*******************************************************/
737 /*******************************************************/