5 * Created by Sarah Westcott on 1/22/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
12 /*****************************************************************/
13 Tree::Tree(int num, TreeMap* t) : tmap(t) {
15 m = MothurOut::getInstance();
18 numNodes = 2*numLeaves - 1;
20 tree.resize(numNodes);
23 m->errorOut(e, "Tree", "Tree - numNodes");
27 /*****************************************************************/
28 Tree::Tree(string g) { //do not use tree generated by this its just to extract the treenames, its a chicken before the egg thing that needs to be revisited.
30 m = MothurOut::getInstance();
31 parseTreeFile(); m->runParse = false;
34 m->errorOut(e, "Tree", "Tree - just parse");
38 /*****************************************************************/
39 Tree::Tree(TreeMap* t) : tmap(t) {
41 m = MothurOut::getInstance();
43 if (m->runParse == true) { parseTreeFile(); m->runParse = false; }
44 //for(int i = 0; i < globaldata->Treenames.size(); i++) { cout << i << '\t' << globaldata->Treenames[i] << endl; }
45 numLeaves = m->Treenames.size();
46 numNodes = 2*numLeaves - 1;
48 tree.resize(numNodes);
50 //initialize groupNodeInfo
51 for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) {
52 groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0);
55 //initialize tree with correct number of nodes, name and group info.
56 for (int i = 0; i < numNodes; i++) {
57 //initialize leaf nodes
58 if (i <= (numLeaves-1)) {
59 tree[i].setName(m->Treenames[i]);
62 string group = tmap->getGroup(m->Treenames[i]);
64 vector<string> tempGroups; tempGroups.push_back(group);
65 tree[i].setGroup(tempGroups);
66 groupNodeInfo[group].push_back(i);
68 //set pcount and pGroup for groupname to 1.
69 tree[i].pcount[group] = 1;
70 tree[i].pGroups[group] = 1;
72 //Treemap knows name, group and index to speed up search
73 tmap->setIndex(m->Treenames[i], i);
75 //intialize non leaf nodes
76 }else if (i > (numLeaves-1)) {
78 vector<string> tempGroups;
79 tree[i].setGroup(tempGroups);
85 m->errorOut(e, "Tree", "Tree");
89 /*****************************************************************/
90 Tree::Tree(TreeMap* t, vector< vector<double> >& sims) : tmap(t) {
92 m = MothurOut::getInstance();
94 if (m->runParse == true) { parseTreeFile(); m->runParse = false; }
95 numLeaves = m->Treenames.size();
96 numNodes = 2*numLeaves - 1;
98 tree.resize(numNodes);
100 //initialize groupNodeInfo
101 for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) {
102 groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0);
105 //initialize tree with correct number of nodes, name and group info.
106 for (int i = 0; i < numNodes; i++) {
107 //initialize leaf nodes
108 if (i <= (numLeaves-1)) {
109 tree[i].setName(m->Treenames[i]);
112 string group = tmap->getGroup(m->Treenames[i]);
114 vector<string> tempGroups; tempGroups.push_back(group);
115 tree[i].setGroup(tempGroups);
116 groupNodeInfo[group].push_back(i);
118 //set pcount and pGroup for groupname to 1.
119 tree[i].pcount[group] = 1;
120 tree[i].pGroups[group] = 1;
122 //Treemap knows name, group and index to speed up search
123 tmap->setIndex(m->Treenames[i], i);
125 //intialize non leaf nodes
126 }else if (i > (numLeaves-1)) {
128 vector<string> tempGroups;
129 tree[i].setGroup(tempGroups);
133 //build tree from matrix
135 map<int, int> indexes; //maps row in simMatrix to vector index in the tree
136 for (int g = 0; g < numLeaves; g++) { indexes[g] = g; }
138 //do merges and create tree structure by setting parents and children
139 //there are numGroups - 1 merges to do
140 for (int i = 0; i < (numLeaves - 1); i++) {
141 float largest = -1000.0;
143 if (m->control_pressed) { break; }
146 //find largest value in sims matrix by searching lower triangle
147 for (int j = 1; j < sims.size(); j++) {
148 for (int k = 0; k < j; k++) {
149 if (sims[j][k] > largest) { largest = sims[j][k]; row = j; column = k; }
153 //set non-leaf node info and update leaves to know their parents
155 tree[numLeaves + i].setChildren(indexes[row], indexes[column]);
158 tree[indexes[row]].setParent(numLeaves + i);
159 tree[indexes[column]].setParent(numLeaves + i);
161 //blength = distance / 2;
162 float blength = ((1.0 - largest) / 2);
165 tree[indexes[row]].setBranchLength(blength - tree[indexes[row]].getLengthToLeaves());
166 tree[indexes[column]].setBranchLength(blength - tree[indexes[column]].getLengthToLeaves());
168 //set your length to leaves to your childs length plus branchlength
169 tree[numLeaves + i].setLengthToLeaves(tree[indexes[row]].getLengthToLeaves() + tree[indexes[row]].getBranchLength());
173 indexes[row] = numLeaves+i;
174 indexes[column] = numLeaves+i;
176 //remove highest value that caused the merge.
177 sims[row][column] = -1000.0;
178 sims[column][row] = -1000.0;
180 //merge values in simsMatrix
181 for (int n = 0; n < sims.size(); n++) {
182 //row becomes merge of 2 groups
183 sims[row][n] = (sims[row][n] + sims[column][n]) / 2;
184 sims[n][row] = sims[row][n];
186 sims[column][n] = -1000.0;
187 sims[n][column] = -1000.0;
191 //adjust tree to make sure root to tip length is .5
192 int root = findRoot();
193 tree[root].setBranchLength((0.5 - tree[root].getLengthToLeaves()));
196 catch(exception& e) {
197 m->errorOut(e, "Tree", "Tree");
201 /*****************************************************************/
203 /*****************************************************************/
204 void Tree::addNamesToCounts(map<string, string> nameMap) {
206 //ex. seq1 seq2,seq3,se4
212 //before this function seq1.pcount = pasture -> 1
213 //after seq1.pcount = pasture -> 2, forest -> 1, ocean -> 1
215 //before this function seq1.pgroups = pasture -> 1
216 //after seq1.pgroups = pasture -> 1 since that is the dominant group
219 //go through each leaf and update its pcounts and pgroups
223 for (int i = 0; i < numLeaves; i++) {
225 string name = tree[i].getName();
227 map<string, string>::iterator itNames = nameMap.find(name);
229 if (itNames == nameMap.end()) { m->mothurOut(name + " is not in your name file, please correct."); m->mothurOutEndLine(); exit(1); }
231 vector<string> dupNames;
232 m->splitAtComma(nameMap[name], dupNames);
234 map<string, int>::iterator itCounts;
236 set<string> groupsAddedForThisNode;
237 for (int j = 0; j < dupNames.size(); j++) {
239 string group = tmap->getGroup(dupNames[j]);
241 if (dupNames[j] != name) {//you already added yourself in the constructor
243 if (groupsAddedForThisNode.count(group) == 0) { groupNodeInfo[group].push_back(i); groupsAddedForThisNode.insert(group); } //if you have not already added this node for this group, then add it
246 itCounts = tree[i].pcount.find(group);
247 if (itCounts == tree[i].pcount.end()) { //new group, add it
248 tree[i].pcount[group] = 1;
250 tree[i].pcount[group]++;
254 itCounts = tree[i].pGroups.find(group);
255 if (itCounts == tree[i].pGroups.end()) { //new group, add it
256 tree[i].pGroups[group] = 1;
258 tree[i].pGroups[group]++;
262 if(tree[i].pGroups[group] > maxPars){
263 maxPars = tree[i].pGroups[group];
265 }else { groupsAddedForThisNode.insert(group); } //add it so you don't add it to groupNodeInfo again
268 if (maxPars > 1) { //then we have some more dominant groups
269 //erase all the groups that are less than maxPars because you found a more dominant group.
270 for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){
271 if(it->second < maxPars){
272 tree[i].pGroups.erase(it++);
275 //set one remaining groups to 1
276 for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){
277 tree[i].pGroups[it->first] = 1;
281 //update groups to reflect all the groups this node represents
282 vector<string> nodeGroups;
283 map<string, int>::iterator itGroups;
284 for (itGroups = tree[i].pcount.begin(); itGroups != tree[i].pcount.end(); itGroups++) {
285 nodeGroups.push_back(itGroups->first);
287 tree[i].setGroup(nodeGroups);
293 //cout << "addNamesToCounts\t" << (B - A) / CLOCKS_PER_SEC << endl;
296 catch(exception& e) {
297 m->errorOut(e, "Tree", "addNamesToCounts");
301 /*****************************************************************/
302 int Tree::getIndex(string searchName) {
304 //Treemap knows name, group and index to speed up search
305 // getIndex function will return the vector index or -1 if seq is not found.
306 int index = tmap->getIndex(searchName);
310 catch(exception& e) {
311 m->errorOut(e, "Tree", "getIndex");
315 /*****************************************************************/
317 void Tree::setIndex(string searchName, int index) {
319 //set index in treemap
320 tmap->setIndex(searchName, index);
322 catch(exception& e) {
323 m->errorOut(e, "Tree", "setIndex");
327 /*****************************************************************/
328 int Tree::assembleTree(map<string, string> nameMap) {
333 //if user has given a names file we want to include that info in the pgroups and pcount info.
334 if(nameMap.size() != 0) { addNamesToCounts(nameMap); }
336 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
337 for (int i = numLeaves; i < numNodes; i++) {
338 if (m->control_pressed) { return 1; }
340 tree[i].pGroups = (mergeGroups(i));
341 tree[i].pcount = (mergeGcounts(i));
346 catch(exception& e) {
347 m->errorOut(e, "Tree", "assembleTree");
351 /*****************************************************************
352 int Tree::assembleTree(string n) {
355 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
356 for (int i = numLeaves; i < numNodes; i++) {
357 if (m->control_pressed) { return 1; }
359 tree[i].pGroups = (mergeGroups(i));
360 tree[i].pcount = (mergeGcounts(i));
363 //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl;
366 catch(exception& e) {
367 m->errorOut(e, "Tree", "assembleTree");
371 /*****************************************************************/
372 //assumes leaf node names are in groups and no names file - used by indicator command
373 void Tree::getSubTree(Tree* Ctree, vector<string> Groups) {
376 //copy Tree since we are going to destroy it
377 Tree* copy = new Tree(tmap);
378 copy->getCopy(Ctree);
379 map<string, string> empty;
380 copy->assembleTree(empty);
382 //we want to select some of the leaf nodes to create the output tree
383 //go through the input Tree starting at parents of leaves
384 for (int i = 0; i < numNodes; i++) {
386 //initialize leaf nodes
387 if (i <= (numLeaves-1)) {
388 tree[i].setName(Groups[i]);
391 string group = tmap->getGroup(Groups[i]);
392 vector<string> tempGroups; tempGroups.push_back(group);
393 tree[i].setGroup(tempGroups);
394 groupNodeInfo[group].push_back(i);
396 //set pcount and pGroup for groupname to 1.
397 tree[i].pcount[group] = 1;
398 tree[i].pGroups[group] = 1;
400 //Treemap knows name, group and index to speed up search
401 tmap->setIndex(Groups[i], i);
403 //intialize non leaf nodes
404 }else if (i > (numLeaves-1)) {
406 vector<string> tempGroups;
407 tree[i].setGroup(tempGroups);
411 set<int> removedLeaves;
412 for (int i = 0; i < copy->getNumLeaves(); i++) {
414 if (removedLeaves.count(i) == 0) {
417 int parent = copy->tree[i].getParent();
421 if (m->inUsersGroups(copy->tree[i].getName(), Groups)) {
422 //find my siblings name
423 int parentRC = copy->tree[parent].getRChild();
424 int parentLC = copy->tree[parent].getLChild();
426 //if I am the right child, then my sib is the left child
427 int sibIndex = parentRC;
428 if (parentRC == i) { sibIndex = parentLC; }
430 string sibsName = copy->tree[sibIndex].getName();
432 //if yes, is my sibling
433 if ((m->inUsersGroups(sibsName, Groups)) || (sibsName == "")) {
434 //we both are okay no trimming required
436 //i am, my sib is not, so remove sib by setting my parent to my grandparent
437 int grandparent = copy->tree[parent].getParent();
438 int grandparentLC = copy->tree[grandparent].getLChild();
439 int grandparentRC = copy->tree[grandparent].getRChild();
441 //whichever of my granparents children was my parent now equals me
442 if (grandparentLC == parent) { grandparentLC = i; }
443 else { grandparentRC = i; }
445 copy->tree[i].setParent(grandparent);
446 copy->tree[i].setBranchLength((copy->tree[i].getBranchLength()+copy->tree[parent].getBranchLength()));
447 if (grandparent != -1) {
448 copy->tree[grandparent].setChildren(grandparentLC, grandparentRC);
450 removedLeaves.insert(sibIndex);
453 //find my siblings name
454 int parentRC = copy->tree[parent].getRChild();
455 int parentLC = copy->tree[parent].getLChild();
457 //if I am the right child, then my sib is the left child
458 int sibIndex = parentRC;
459 if (parentRC == i) { sibIndex = parentLC; }
461 string sibsName = copy->tree[sibIndex].getName();
463 //if no is my sibling
464 if ((m->inUsersGroups(sibsName, Groups)) || (sibsName == "")) {
465 //i am not, but my sib is
466 int grandparent = copy->tree[parent].getParent();
467 int grandparentLC = copy->tree[grandparent].getLChild();
468 int grandparentRC = copy->tree[grandparent].getRChild();
470 //whichever of my granparents children was my parent now equals my sib
471 if (grandparentLC == parent) { grandparentLC = sibIndex; }
472 else { grandparentRC = sibIndex; }
474 copy->tree[sibIndex].setParent(grandparent);
475 copy->tree[sibIndex].setBranchLength((copy->tree[sibIndex].getBranchLength()+copy->tree[parent].getBranchLength()));
476 if (grandparent != -1) {
477 copy->tree[grandparent].setChildren(grandparentLC, grandparentRC);
479 removedLeaves.insert(i);
481 //neither of us are, so we want to eliminate ourselves and our parent
482 //so set our parents sib to our great-grandparent
483 int parent = copy->tree[i].getParent();
484 int grandparent = copy->tree[parent].getParent();
486 if (grandparent != -1) {
487 int greatgrandparent = copy->tree[grandparent].getParent();
488 int greatgrandparentLC, greatgrandparentRC;
489 if (greatgrandparent != -1) {
490 greatgrandparentLC = copy->tree[greatgrandparent].getLChild();
491 greatgrandparentRC = copy->tree[greatgrandparent].getRChild();
494 int grandparentLC = copy->tree[grandparent].getLChild();
495 int grandparentRC = copy->tree[grandparent].getRChild();
497 parentsSibIndex = grandparentLC;
498 if (grandparentLC == parent) { parentsSibIndex = grandparentRC; }
500 //whichever of my greatgrandparents children was my grandparent
501 if (greatgrandparentLC == grandparent) { greatgrandparentLC = parentsSibIndex; }
502 else { greatgrandparentRC = parentsSibIndex; }
504 copy->tree[parentsSibIndex].setParent(greatgrandparent);
505 copy->tree[parentsSibIndex].setBranchLength((copy->tree[parentsSibIndex].getBranchLength()+copy->tree[grandparent].getBranchLength()));
506 if (greatgrandparent != -1) {
507 copy->tree[greatgrandparent].setChildren(greatgrandparentLC, greatgrandparentRC);
510 copy->tree[parent].setParent(-1);
511 //cout << "issues with making subtree" << endl;
513 removedLeaves.insert(sibIndex);
514 removedLeaves.insert(i);
522 for (int i = 0; i < copy->getNumNodes(); i++) {
524 if (copy->tree[i].getParent() == -1) { root = i; break; }
527 int nextSpot = numLeaves;
528 populateNewTree(copy->tree, root, nextSpot);
532 catch(exception& e) {
533 m->errorOut(e, "Tree", "getSubTree");
537 /*****************************************************************/
538 //assumes nameMap contains unique names as key or is empty.
539 //assumes numLeaves defined in tree constructor equals size of seqsToInclude and seqsToInclude only contains unique seqs.
540 int Tree::getSubTree(Tree* copy, vector<string> seqsToInclude, map<string, string> nameMap) {
543 if (numLeaves != seqsToInclude.size()) { m->mothurOut("[ERROR]: numLeaves does not equal numUniques, cannot create subtree.\n"); m->control_pressed = true; return 0; }
545 getSubTree(copy, seqsToInclude);
546 if (nameMap.size() != 0) { addNamesToCounts(nameMap); }
548 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
549 for (int i = numLeaves; i < numNodes; i++) {
550 if (m->control_pressed) { return 1; }
552 tree[i].pGroups = (mergeGroups(i));
553 tree[i].pcount = (mergeGcounts(i));
558 catch(exception& e) {
559 m->errorOut(e, "Tree", "getSubTree");
563 /*****************************************************************/
564 int Tree::populateNewTree(vector<Node>& oldtree, int node, int& index) {
567 if (oldtree[node].getLChild() != -1) {
568 int rc = populateNewTree(oldtree, oldtree[node].getLChild(), index);
569 int lc = populateNewTree(oldtree, oldtree[node].getRChild(), index);
571 tree[index].setChildren(lc, rc);
572 tree[rc].setParent(index);
573 tree[lc].setParent(index);
575 tree[index].setBranchLength(oldtree[node].getBranchLength());
576 tree[rc].setBranchLength(oldtree[oldtree[node].getLChild()].getBranchLength());
577 tree[lc].setBranchLength(oldtree[oldtree[node].getRChild()].getBranchLength());
580 }else { //you are a leaf
581 int indexInNewTree = tmap->getIndex(oldtree[node].getName());
582 return indexInNewTree;
585 catch(exception& e) {
586 m->errorOut(e, "Tree", "populateNewTree");
590 /*****************************************************************/
591 void Tree::getCopy(Tree* copy, map<string, string> nameMap, vector<string> namesToInclude) {
594 //for each node in the tree copy its info
595 for (int i = 0; i < numNodes; i++) {
597 tree[i].setName(copy->tree[i].getName());
601 tree[i].setGroup(temp);
604 tree[i].setBranchLength(copy->tree[i].getBranchLength());
607 tree[i].setParent(copy->tree[i].getParent());
610 tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
612 //copy index in node and tmap
613 tree[i].setIndex(copy->tree[i].getIndex());
614 setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
617 tree[i].pGroups.clear();
620 tree[i].pcount.clear();
623 groupNodeInfo.clear();
625 //now lets change prune the seqs not in namesToInclude by setting their group to "doNotIncludeMe"
626 for (int i = 0; i < numLeaves; i++) {
628 if (m->control_pressed) { break; }
630 string name = tree[i].getName();
632 map<string, string>::iterator itNames = nameMap.find(name);
634 if (itNames == nameMap.end()) { m->mothurOut(name + " is not in your name file, please correct."); m->mothurOutEndLine(); exit(1); }
636 vector<string> dupNames;
637 m->splitAtComma(nameMap[name], dupNames);
639 map<string, int>::iterator itCounts;
641 set<string> groupsAddedForThisNode;
642 for (int j = 0; j < dupNames.size(); j++) {
644 string group = tmap->getGroup(dupNames[j]);
645 bool includeMe = m->inUsersGroups(dupNames[j], namesToInclude);
647 if (!includeMe && (group != "doNotIncludeMe")) { m->mothurOut("[ERROR] : creating subtree in copy.\n"); m->control_pressed = true; }
648 else if (!includeMe) {
649 if (groupsAddedForThisNode.count(group) == 0) { groupNodeInfo[group].push_back(i); groupsAddedForThisNode.insert(group); } //if you have not already added this node for this group, then add it
652 itCounts = tree[i].pcount.find(group);
653 if (itCounts == tree[i].pcount.end()) { //new group, add it
654 tree[i].pcount[group] = 1;
656 tree[i].pcount[group]++;
660 itCounts = tree[i].pGroups.find(group);
661 if (itCounts == tree[i].pGroups.end()) { //new group, add it
662 tree[i].pGroups[group] = 1;
664 tree[i].pGroups[group]++;
668 if(tree[i].pGroups[group] > maxPars){
669 maxPars = tree[i].pGroups[group];
674 if (maxPars > 1) { //then we have some more dominant groups
675 //erase all the groups that are less than maxPars because you found a more dominant group.
676 for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){
677 if(it->second < maxPars){
678 tree[i].pGroups.erase(it++);
681 //set one remaining groups to 1
682 for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){
683 tree[i].pGroups[it->first] = 1;
687 //update groups to reflect all the groups this node represents
688 vector<string> nodeGroups;
689 map<string, int>::iterator itGroups;
690 for (itGroups = tree[i].pcount.begin(); itGroups != tree[i].pcount.end(); itGroups++) {
691 nodeGroups.push_back(itGroups->first);
693 tree[i].setGroup(nodeGroups);
699 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
700 for (int i = numLeaves; i < numNodes; i++) {
701 if (m->control_pressed) { break; }
703 tree[i].pGroups = (mergeGroups(i));
704 tree[i].pcount = (mergeGcounts(i));
707 catch(exception& e) {
708 m->errorOut(e, "Tree", "getCopy");
712 /*****************************************************************/
713 void Tree::getCopy(Tree* copy) {
716 //for each node in the tree copy its info
717 for (int i = 0; i < numNodes; i++) {
719 tree[i].setName(copy->tree[i].getName());
722 tree[i].setGroup(copy->tree[i].getGroup());
725 tree[i].setBranchLength(copy->tree[i].getBranchLength());
728 tree[i].setParent(copy->tree[i].getParent());
731 tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
733 //copy index in node and tmap
734 tree[i].setIndex(copy->tree[i].getIndex());
735 setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
738 tree[i].pGroups = copy->tree[i].pGroups;
741 tree[i].pcount = copy->tree[i].pcount;
744 groupNodeInfo = copy->groupNodeInfo;
747 catch(exception& e) {
748 m->errorOut(e, "Tree", "getCopy");
752 /*****************************************************************/
753 //returns a map with a groupname and the number of times that group was seen in the children
754 //for instance if your children are white and black then it would return a map with 2 entries
755 // p[white] = 1 and p[black] = 1. Now go up a level and merge that with a node who has p[white] = 1
756 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
758 map<string, int> Tree::mergeGroups(int i) {
760 int lc = tree[i].getLChild();
761 int rc = tree[i].getRChild();
763 //set parsimony groups to left child
764 map<string,int> parsimony = tree[lc].pGroups;
768 //look at right child groups and update maxPars if right child has something higher for that group.
769 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
770 it2 = parsimony.find(it->first);
771 if (it2 != parsimony.end()) {
772 parsimony[it->first]++;
774 parsimony[it->first] = 1;
777 if(parsimony[it->first] > maxPars){
778 maxPars = parsimony[it->first];
782 // this is true if right child had a greater parsimony for a certain group
784 //erase all the groups that are only 1 because you found something with 2.
785 for(it=parsimony.begin();it!=parsimony.end();){
787 parsimony.erase(it++);
790 //set one remaining groups to 1
791 //so with our above example p[white] = 2 would be left and it would become p[white] = 1
792 for(it=parsimony.begin();it!=parsimony.end();it++){
793 parsimony[it->first] = 1;
800 catch(exception& e) {
801 m->errorOut(e, "Tree", "mergeGroups");
805 /*****************************************************************/
806 //returns a map with a groupname and the number of times that group was seen in the children
807 //for instance if your children are white and black then it would return a map with 2 entries
808 // p[white] = 1 and p[black] = 1. Now go up a level and merge that with a node who has p[white] = 1
809 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
811 map<string, int> Tree::mergeUserGroups(int i, vector<string> g) {
814 int lc = tree[i].getLChild();
815 int rc = tree[i].getRChild();
817 //loop through nodes groups removing the ones the user doesn't want
818 for(it=tree[lc].pGroups.begin();it!=tree[lc].pGroups.end();){
819 if (m->inUsersGroups(it->first, g) != true) {
820 tree[lc].pGroups.erase(it++);
824 //loop through nodes groups removing the ones the user doesn't want
825 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();){
826 if (m->inUsersGroups(it->first, g) != true) {
827 tree[rc].pGroups.erase(it++);
831 //set parsimony groups to left child
832 map<string,int> parsimony = tree[lc].pGroups;
836 //look at right child groups and update maxPars if right child has something higher for that group.
837 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
838 it2 = parsimony.find(it->first);
839 if (it2 != parsimony.end()) {
840 parsimony[it->first]++;
842 parsimony[it->first] = 1;
845 if(parsimony[it->first] > maxPars){
846 maxPars = parsimony[it->first];
850 // this is true if right child had a greater parsimony for a certain group
852 //erase all the groups that are only 1 because you found something with 2.
853 for(it=parsimony.begin();it!=parsimony.end();){
855 parsimony.erase(it++);
859 for(it=parsimony.begin();it!=parsimony.end();it++){
860 parsimony[it->first] = 1;
866 catch(exception& e) {
867 m->errorOut(e, "Tree", "mergeUserGroups");
873 /**************************************************************************************************/
875 map<string,int> Tree::mergeGcounts(int position) {
877 map<string,int>::iterator pos;
879 int lc = tree[position].getLChild();
880 int rc = tree[position].getRChild();
882 map<string,int> sum = tree[lc].pcount;
884 for(it=tree[rc].pcount.begin();it!=tree[rc].pcount.end();it++){
885 sum[it->first] += it->second;
889 catch(exception& e) {
890 m->errorOut(e, "Tree", "mergeGcounts");
894 /**************************************************************************************************/
895 void Tree::randomLabels(vector<string> g) {
898 //initialize groupNodeInfo
899 for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) {
900 groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0);
903 for(int i = 0; i < numLeaves; i++){
905 //get random index to switch with
906 z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
908 //you only want to randomize the nodes that are from a group the user wants analyzed, so
909 //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them.
912 treez = m->inUsersGroups(tree[z].getGroup(), g);
913 treei = m->inUsersGroups(tree[i].getGroup(), g);
915 if ((treez == true) && (treei == true)) {
916 //switches node i and node z's info.
917 map<string,int> lib_hold = tree[z].pGroups;
918 tree[z].pGroups = (tree[i].pGroups);
919 tree[i].pGroups = (lib_hold);
921 vector<string> zgroup = tree[z].getGroup();
922 tree[z].setGroup(tree[i].getGroup());
923 tree[i].setGroup(zgroup);
925 string zname = tree[z].getName();
926 tree[z].setName(tree[i].getName());
927 tree[i].setName(zname);
929 map<string,int> gcount_hold = tree[z].pcount;
930 tree[z].pcount = (tree[i].pcount);
931 tree[i].pcount = (gcount_hold);
934 for (int k = 0; k < (tree[i].getGroup()).size(); k++) { groupNodeInfo[(tree[i].getGroup())[k]].push_back(i); }
935 for (int k = 0; k < (tree[z].getGroup()).size(); k++) { groupNodeInfo[(tree[z].getGroup())[k]].push_back(z); }
938 catch(exception& e) {
939 m->errorOut(e, "Tree", "randomLabels");
943 /**************************************************************************************************/
944 void Tree::randomBlengths() {
946 for(int i=numNodes-1;i>=0;i--){
947 int z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
949 float bl_hold = tree[z].getBranchLength();
950 tree[z].setBranchLength(tree[i].getBranchLength());
951 tree[i].setBranchLength(bl_hold);
954 catch(exception& e) {
955 m->errorOut(e, "Tree", "randomBlengths");
959 /*************************************************************************************************/
960 void Tree::assembleRandomUnifracTree(vector<string> g) {
962 map<string, string> empty;
965 /*************************************************************************************************/
966 void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
967 vector<string> temp; temp.push_back(groupA); temp.push_back(groupB);
969 map<string, string> empty;
973 /*************************************************************************************************/
974 //for now it's just random topology but may become random labels as well later that why this is such a simple function now...
975 void Tree::assembleRandomTree() {
977 map<string, string> empty;
980 /**************************************************************************************************/
982 void Tree::randomTopology() {
984 for(int i=0;i<numNodes;i++){
985 tree[i].setParent(-1);
987 for(int i=numLeaves;i<numNodes;i++){
988 tree[i].setChildren(-1, -1);
991 for(int i=numLeaves;i<numNodes;i++){
993 int rnd_index1, rnd_index2;
995 rnd_index1 = (int)(((double)rand() / (double) RAND_MAX)*i);
996 if(tree[rnd_index1].getParent() == -1){escape = 1;}
1001 rnd_index2 = (int)(((double)rand() / (double) RAND_MAX)*i);
1002 if(rnd_index2 != rnd_index1 && tree[rnd_index2].getParent() == -1){
1007 tree[i].setChildren(rnd_index1,rnd_index2);
1008 tree[i].setParent(-1);
1009 tree[rnd_index1].setParent(i);
1010 tree[rnd_index2].setParent(i);
1013 catch(exception& e) {
1014 m->errorOut(e, "Tree", "randomTopology");
1018 /*****************************************************************/
1019 void Tree::print(ostream& out) {
1021 int root = findRoot();
1022 printBranch(root, out, "branch");
1025 catch(exception& e) {
1026 m->errorOut(e, "Tree", "print");
1030 /*****************************************************************/
1031 void Tree::print(ostream& out, map<string, string> nameMap) {
1033 int root = findRoot();
1034 printBranch(root, out, nameMap);
1037 catch(exception& e) {
1038 m->errorOut(e, "Tree", "print");
1042 /*****************************************************************/
1043 void Tree::print(ostream& out, string mode) {
1045 int root = findRoot();
1046 printBranch(root, out, mode);
1049 catch(exception& e) {
1050 m->errorOut(e, "Tree", "print");
1054 /*****************************************************************/
1055 // This prints out the tree in Newick form.
1056 void Tree::createNewickFile(string f) {
1058 int root = findRoot();
1062 m->openOutputFile(filename, out);
1064 printBranch(root, out, "branch");
1066 // you are at the end of the tree
1070 catch(exception& e) {
1071 m->errorOut(e, "Tree", "createNewickFile");
1076 /*****************************************************************/
1077 //This function finds the index of the root node.
1079 int Tree::findRoot() {
1081 for (int i = 0; i < numNodes; i++) {
1082 //you found the root
1083 if (tree[i].getParent() == -1) { return i; }
1084 //cout << "i = " << i << endl;
1085 //cout << "i's parent = " << tree[i].getParent() << endl;
1089 catch(exception& e) {
1090 m->errorOut(e, "Tree", "findRoot");
1094 /*****************************************************************/
1095 void Tree::printBranch(int node, ostream& out, map<string, string> names) {
1098 // you are not a leaf
1099 if (tree[node].getLChild() != -1) {
1101 printBranch(tree[node].getLChild(), out, names);
1103 printBranch(tree[node].getRChild(), out, names);
1106 //if there is a branch length then print it
1107 if (tree[node].getBranchLength() != -1) {
1108 out << ":" << tree[node].getBranchLength();
1111 }else { //you are a leaf
1112 map<string, string>::iterator itNames = names.find(tree[node].getName());
1114 string outputString = "";
1115 if (itNames != names.end()) {
1117 vector<string> dupNames;
1118 m->splitAtComma((itNames->second), dupNames);
1120 if (dupNames.size() == 1) {
1121 outputString += tree[node].getName();
1122 if (tree[node].getBranchLength() != -1) {
1123 outputString += ":" + toString(tree[node].getBranchLength());
1126 outputString += "(";
1128 for (int u = 0; u < dupNames.size()-1; u++) {
1129 outputString += dupNames[u];
1131 if (tree[node].getBranchLength() != -1) {
1132 outputString += ":" + toString(0.0);
1134 outputString += ",";
1137 outputString += dupNames[dupNames.size()-1];
1138 if (tree[node].getBranchLength() != -1) {
1139 outputString += ":" + toString(0.0);
1142 outputString += ")";
1143 if (tree[node].getBranchLength() != -1) {
1144 outputString += ":" + toString(tree[node].getBranchLength());
1148 outputString = tree[node].getName();
1149 //if there is a branch length then print it
1150 if (tree[node].getBranchLength() != -1) {
1151 outputString += ":" + toString(tree[node].getBranchLength());
1154 m->mothurOut("[ERROR]: " + tree[node].getName() + " is not in your namefile, please correct."); m->mothurOutEndLine();
1157 out << outputString;
1161 catch(exception& e) {
1162 m->errorOut(e, "Tree", "printBranch");
1166 /*****************************************************************/
1167 void Tree::printBranch(int node, ostream& out, string mode) {
1170 // you are not a leaf
1171 if (tree[node].getLChild() != -1) {
1173 printBranch(tree[node].getLChild(), out, mode);
1175 printBranch(tree[node].getRChild(), out, mode);
1177 if (mode == "branch") {
1178 //if there is a branch length then print it
1179 if (tree[node].getBranchLength() != -1) {
1180 out << ":" << tree[node].getBranchLength();
1182 }else if (mode == "boot") {
1183 //if there is a label then print it
1184 if (tree[node].getLabel() != -1) {
1185 out << tree[node].getLabel();
1187 }else if (mode == "both") {
1188 if (tree[node].getLabel() != -1) {
1189 out << tree[node].getLabel();
1191 //if there is a branch length then print it
1192 if (tree[node].getBranchLength() != -1) {
1193 out << ":" << tree[node].getBranchLength();
1196 }else { //you are a leaf
1197 string leafGroup = tmap->getGroup(tree[node].getName());
1199 if (mode == "branch") {
1201 //if there is a branch length then print it
1202 if (tree[node].getBranchLength() != -1) {
1203 out << ":" << tree[node].getBranchLength();
1205 }else if (mode == "boot") {
1207 //if there is a label then print it
1208 if (tree[node].getLabel() != -1) {
1209 out << tree[node].getLabel();
1211 }else if (mode == "both") {
1212 out << tree[node].getName();
1213 if (tree[node].getLabel() != -1) {
1214 out << tree[node].getLabel();
1216 //if there is a branch length then print it
1217 if (tree[node].getBranchLength() != -1) {
1218 out << ":" << tree[node].getBranchLength();
1224 catch(exception& e) {
1225 m->errorOut(e, "Tree", "printBranch");
1229 /*****************************************************************/
1230 void Tree::printBranch(int node, ostream& out, string mode, vector<Node>& theseNodes) {
1233 // you are not a leaf
1234 if (theseNodes[node].getLChild() != -1) {
1236 printBranch(theseNodes[node].getLChild(), out, mode);
1238 printBranch(theseNodes[node].getRChild(), out, mode);
1240 if (mode == "branch") {
1241 //if there is a branch length then print it
1242 if (theseNodes[node].getBranchLength() != -1) {
1243 out << ":" << theseNodes[node].getBranchLength();
1245 }else if (mode == "boot") {
1246 //if there is a label then print it
1247 if (theseNodes[node].getLabel() != -1) {
1248 out << theseNodes[node].getLabel();
1250 }else if (mode == "both") {
1251 if (theseNodes[node].getLabel() != -1) {
1252 out << theseNodes[node].getLabel();
1254 //if there is a branch length then print it
1255 if (theseNodes[node].getBranchLength() != -1) {
1256 out << ":" << theseNodes[node].getBranchLength();
1259 }else { //you are a leaf
1260 string leafGroup = tmap->getGroup(theseNodes[node].getName());
1262 if (mode == "branch") {
1264 //if there is a branch length then print it
1265 if (theseNodes[node].getBranchLength() != -1) {
1266 out << ":" << theseNodes[node].getBranchLength();
1268 }else if (mode == "boot") {
1270 //if there is a label then print it
1271 if (theseNodes[node].getLabel() != -1) {
1272 out << theseNodes[node].getLabel();
1274 }else if (mode == "both") {
1275 out << theseNodes[node].getName();
1276 if (theseNodes[node].getLabel() != -1) {
1277 out << theseNodes[node].getLabel();
1279 //if there is a branch length then print it
1280 if (theseNodes[node].getBranchLength() != -1) {
1281 out << ":" << theseNodes[node].getBranchLength();
1287 catch(exception& e) {
1288 m->errorOut(e, "Tree", "printBranch");
1292 /*****************************************************************/
1294 void Tree::printTree() {
1296 for(int i=0;i<numNodes;i++){
1298 tree[i].printNode();
1303 /*****************************************************************/
1304 //this code is a mess and should be rethought...-slw
1305 void Tree::parseTreeFile() {
1307 //only takes names from the first tree and assumes that all trees use the same names.
1309 string filename = m->getTreeFile();
1310 ifstream filehandle;
1311 m->openInputFile(filename, filehandle);
1316 //ifyou are not a nexus file
1317 if((c = filehandle.peek()) != '#') {
1318 while((c = filehandle.peek()) != ';') {
1319 while ((c = filehandle.peek()) != ';') {
1320 // get past comments
1327 if((c == '(') && (comment != 1)){ break; }
1331 done = readTreeString(filehandle);
1332 if (done == 0) { break; }
1334 //ifyou are a nexus file
1335 }else if((c = filehandle.peek()) == '#') {
1338 // get past comments
1339 while(holder != "translate" && holder != "Translate"){
1340 if(holder == "[" || holder == "[!"){
1346 filehandle >> holder;
1348 //if there is no translate then you must read tree string otherwise use translate to get names
1349 if((holder == "tree") && (comment != 1)){
1350 //pass over the "tree rep.6878900 = "
1351 while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF)) {;}
1353 if(c == EOF) { break; }
1354 filehandle.putback(c); //put back first ( of tree.
1355 done = readTreeString(filehandle);
1360 if (done == 0) { break; }
1363 //use nexus translation rather than parsing tree to save time
1364 if((holder == "translate") || (holder == "Translate")) {
1366 string number, name, h;
1367 h = ""; // so it enters the loop the first time
1368 while((h != ";") && (number != ";")) {
1369 filehandle >> number;
1372 //c = , until done with translation then c = ;
1373 h = name.substr(name.length()-1, name.length());
1374 name.erase(name.end()-1); //erase the comma
1375 m->Treenames.push_back(number);
1377 if(number == ";") { m->Treenames.pop_back(); } //in case ';' from translation is on next line instead of next to last name
1382 //for (int i = 0; i < globaldata->Treenames.size(); i++) {
1383 //cout << globaldata->Treenames[i] << endl; }
1384 //cout << globaldata->Treenames.size() << endl;
1386 catch(exception& e) {
1387 m->errorOut(e, "Tree", "parseTreeFile");
1391 /*******************************************************/
1393 /*******************************************************/
1394 int Tree::readTreeString(ifstream& filehandle) {
1399 while((c = filehandle.peek()) != ';') {
1401 //cout << " at beginning of while " << k << endl;
1403 //to pass over labels in trees
1405 while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ c=filehandle.get(); }
1406 filehandle.putback(c);
1408 if(c == ';') { return 0; }
1409 if(c == -1) { return 0; }
1411 if((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != '\t') && (c != 32)) { //32 is space
1413 c = filehandle.get();
1415 //cout << k << endl;
1416 while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) {
1418 c = filehandle.get();
1420 //cout << " in name while " << k << endl;
1423 //cout << "name = " << name << endl;
1424 m->Treenames.push_back(name);
1425 filehandle.putback(c);
1427 //cout << " after putback" << k << endl;
1430 if(c == ':') { //read until you reach the end of the branch length
1431 while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) {
1432 c = filehandle.get();
1434 //cout << " in branch while " << k << endl;
1436 filehandle.putback(c);
1439 c = filehandle.get();
1441 //cout << " here after get " << k << endl;
1442 if(c == ';') { return 0; }
1443 if(c == ')') { filehandle.putback(c); }
1445 //cout << k << endl;
1450 catch(exception& e) {
1451 m->errorOut(e, "Tree", "readTreeString");
1456 /*******************************************************/
1458 /*******************************************************/