5 * Created by Sarah Westcott on 1/22/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
13 /*****************************************************************/
17 globaldata = GlobalData::getInstance();
18 numLeaves = globaldata->gTreemap->getNumSeqs();
19 numNodes = 2*numLeaves - 1;
21 tree.resize(numNodes);
23 //initialize tree with correct number of nodes, name and group info.
24 for (int i = 0; i < numNodes; i++) {
25 //initialize leaf nodes
26 if (i <= (numLeaves-1)) {
27 tree[i].setName(globaldata->gTreemap->namesOfSeqs[i]);
28 tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i]));
29 //the node knows its index
31 //set pcount and pGroup for groupname to 1.
32 tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1;
33 tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1;
34 //Treemap knows name, group and index to speed up search
35 globaldata->gTreemap->setIndex(globaldata->gTreemap->namesOfSeqs[i], i);
37 //intialize non leaf nodes
38 }else if (i > (numLeaves-1)) {
41 //the node knows its index
47 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function Tree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
51 cout << "An unknown error has occurred in the Tree class function Tree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
56 /*****************************************************************/
58 int Tree::getIndex(string searchName) {
60 //Treemap knows name, group and index to speed up search
61 // getIndex function will return the vector index or -1 if seq is not found.
62 int index = globaldata->gTreemap->getIndex(searchName);
67 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function getIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
71 cout << "An unknown error has occurred in the Tree class function getIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
75 /*****************************************************************/
77 void Tree::setIndex(string searchName, int index) {
79 //set index in treemap
80 globaldata->gTreemap->setIndex(searchName, index);
83 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function setIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
87 cout << "An unknown error has occurred in the Tree class function setIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
91 /*****************************************************************/
92 void Tree::assembleTree() {
94 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
95 for (int i = numLeaves; i < numNodes; i++) {
96 tree[i].pGroups = (mergeGroups(i));
97 tree[i].pcount = (mergeGcounts(i));
100 catch(exception& e) {
101 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function assembleTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
105 cout << "An unknown error has occurred in the Tree class function assembleTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
109 /*****************************************************************/
110 void Tree::getCopy(Tree* copy) {
113 //for each node in the tree copy its info
114 for (int i = 0; i < numNodes; i++) {
116 tree[i].setName(copy->tree[i].getName());
119 tree[i].setGroup(copy->tree[i].getGroup());
122 tree[i].setBranchLength(copy->tree[i].getBranchLength());
125 tree[i].setParent(copy->tree[i].getParent());
128 tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
130 //copy index in node and tmap
131 tree[i].setIndex(copy->tree[i].getIndex());
132 setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
135 tree[i].pGroups = copy->tree[i].pGroups;
138 tree[i].pcount = copy->tree[i].pcount;
141 catch(exception& e) {
142 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function getCopy. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
146 cout << "An unknown error has occurred in the Tree class function getCopy. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
150 /*****************************************************************/
151 //returns a map with a groupname and the number of times that group was seen in the children
152 //for instance if your children are white and black then it would return a map with 2 entries
153 // p[white] = 1 and p[black] = 1. Now go up a level and merge that with a node who has p[white] = 1
154 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
156 map<string, int> Tree::mergeGroups(int i) {
158 int lc = tree[i].getLChild();
159 int rc = tree[i].getRChild();
161 //set parsimony groups to left child
162 map<string,int> parsimony = tree[lc].pGroups;
166 //look at right child groups and update maxPars if right child has something higher for that group.
167 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
168 it2 = parsimony.find(it->first);
169 if (it2 != parsimony.end()) {
170 parsimony[it->first]++;
172 parsimony[it->first] = 1;
175 if(parsimony[it->first] > maxPars){
176 maxPars = parsimony[it->first];
180 // this is true if right child had a greater parsimony for a certain group
182 //erase all the groups that are only 1 because you found something with 2.
183 for(it=parsimony.begin();it!=parsimony.end();it++){
185 parsimony.erase(it->first);
189 //set one remaining groups to 1
190 //so with our above example p[white] = 2 would be left and it would become p[white] = 1
191 for(it=parsimony.begin();it!=parsimony.end();it++){
192 parsimony[it->first] = 1;
199 catch(exception& e) {
200 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
204 cout << "An unknown error has occurred in the Tree class function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
208 /*****************************************************************/
209 //returns a map with a groupname and the number of times that group was seen in the children
210 //for instance if your children are white and black then it would return a map with 2 entries
211 // p[white] = 1 and p[black] = 1. Now go up a level and merge that with a node who has p[white] = 1
212 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
214 map<string, int> Tree::mergeUserGroups(int i) {
217 int lc = tree[i].getLChild();
218 int rc = tree[i].getRChild();
220 //loop through nodes groups removing the ones the user doesn't want
221 for (it = tree[lc].pGroups.begin(); it != tree[lc].pGroups.end(); it++) {
222 if (inUsersGroups(it->first, globaldata->Groups) != true) { tree[lc].pGroups.erase(it->first); }
225 //loop through nodes groups removing the ones the user doesn't want
226 for (it = tree[rc].pGroups.begin(); it != tree[rc].pGroups.end(); it++) {
227 if (inUsersGroups(it->first, globaldata->Groups) != true) { tree[rc].pGroups.erase(it->first); }
230 //set parsimony groups to left child
231 map<string,int> parsimony = tree[lc].pGroups;
235 //look at right child groups and update maxPars if right child has something higher for that group.
236 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
237 it2 = parsimony.find(it->first);
238 if (it2 != parsimony.end()) {
239 parsimony[it->first]++;
241 parsimony[it->first] = 1;
244 if(parsimony[it->first] > maxPars){
245 maxPars = parsimony[it->first];
249 // this is true if right child had a greater parsimony for a certain group
251 //erase all the groups that are only 1 because you found something with 2.
252 for(it=parsimony.begin();it!=parsimony.end();it++){
254 parsimony.erase(it->first);
258 //set one remaining groups to 1
259 //so with our above example p[white] = 2 would be left and it would become p[white] = 1
260 for(it=parsimony.begin();it!=parsimony.end();it++){
261 parsimony[it->first] = 1;
268 catch(exception& e) {
269 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
273 cout << "An unknown error has occurred in the Tree class function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
279 /**************************************************************************************************/
281 map<string,int> Tree::mergeGcounts(int position) {
283 map<string,int>::iterator pos;
285 int lc = tree[position].getLChild();
286 int rc = tree[position].getRChild();
288 map<string,int> sum = tree[lc].pcount;
290 for(it=tree[rc].pcount.begin();it!=tree[rc].pcount.end();it++){
291 sum[it->first] += it->second;
295 catch(exception& e) {
296 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGcounts. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
300 cout << "An unknown error has occurred in the Tree class function mergeGcounts. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
304 /**************************************************************************************************/
306 void Tree::randomLabels() {
309 //set up the groups the user wants to include
312 for(int i = 0; i < numLeaves; i++){
314 //get random index to switch with
315 z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
317 //you only want to randomize the nodes that are from a group the user wants analyzed, so
318 //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them.
321 treez = inUsersGroups(tree[z].getGroup(), globaldata->Groups);
322 treei = inUsersGroups(tree[i].getGroup(), globaldata->Groups);
324 if ((treez == true) && (treei == true)) {
325 //switches node i and node z's info.
326 map<string,int> lib_hold = tree[z].pGroups;
327 tree[z].pGroups = (tree[i].pGroups);
328 tree[i].pGroups = (lib_hold);
330 string zgroup = tree[z].getGroup();
331 tree[z].setGroup(tree[i].getGroup());
332 tree[i].setGroup(zgroup);
334 string zname = tree[z].getName();
335 tree[z].setName(tree[i].getName());
336 tree[i].setName(zname);
338 map<string,int> gcount_hold = tree[z].pcount;
339 tree[z].pcount = (tree[i].pcount);
340 tree[i].pcount = (gcount_hold);
344 catch(exception& e) {
345 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
349 cout << "An unknown error has occurred in the Tree class function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
353 /**************************************************************************************************/
355 void Tree::randomLabels(string groupA, string groupB) {
357 for(int i = 0; i < numLeaves; i++) {
359 //get random index to switch with
360 z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
362 //you only want to randomize the nodes that are from a group the user wants analyzed, so
363 //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them.
364 if (((tree[z].getGroup() == groupA) || (tree[z].getGroup() == groupB)) && ((tree[i].getGroup() == groupA) || (tree[i].getGroup() == groupB))) {
365 //switches node i and node z's info.
366 map<string,int> lib_hold = tree[z].pGroups;
367 tree[z].pGroups = (tree[i].pGroups);
368 tree[i].pGroups = (lib_hold);
370 string zgroup = tree[z].getGroup();
371 tree[z].setGroup(tree[i].getGroup());
372 tree[i].setGroup(zgroup);
374 string zname = tree[z].getName();
375 tree[z].setName(tree[i].getName());
376 tree[i].setName(zname);
378 map<string,int> gcount_hold = tree[z].pcount;
379 tree[z].pcount = (tree[i].pcount);
380 tree[i].pcount = (gcount_hold);
384 catch(exception& e) {
385 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
389 cout << "An unknown error has occurred in the Tree class function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
393 /**************************************************************************************************/
394 void Tree::randomBlengths() {
396 for(int i=numNodes-1;i>=0;i--){
397 int z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
399 float bl_hold = tree[z].getBranchLength();
400 tree[z].setBranchLength(tree[i].getBranchLength());
401 tree[i].setBranchLength(bl_hold);
404 catch(exception& e) {
405 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomBlengths. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
409 cout << "An unknown error has occurred in the Tree class function randomBlengths. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
413 /*************************************************************************************************/
414 void Tree::assembleRandomUnifracTree() {
418 /*************************************************************************************************/
419 void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
420 randomLabels(groupA, groupB);
424 /*************************************************************************************************/
425 //for now it's just random topology but may become random labels as well later that why this is such a simple function now...
426 void Tree::assembleRandomTree() {
430 /**************************************************************************************************/
432 void Tree::randomTopology() {
434 for(int i=0;i<numNodes;i++){
435 tree[i].setParent(-1);
437 for(int i=numLeaves;i<numNodes;i++){
438 tree[i].setChildren(-1, -1);
441 for(int i=numLeaves;i<numNodes;i++){
443 int rnd_index1, rnd_index2;
445 rnd_index1 = (int)(((double)rand() / (double) RAND_MAX)*i);
446 if(tree[rnd_index1].getParent() == -1){escape = 1;}
451 rnd_index2 = (int)(((double)rand() / (double) RAND_MAX)*i);
452 if(rnd_index2 != rnd_index1 && tree[rnd_index2].getParent() == -1){
457 tree[i].setChildren(rnd_index1,rnd_index2);
458 tree[i].setParent(-1);
459 tree[rnd_index1].setParent(i);
460 tree[rnd_index2].setParent(i);
463 catch(exception& e) {
464 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomTopology. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
468 cout << "An unknown error has occurred in the Tree class function randomTopology. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
473 /*****************************************************************/
474 // This prints out the tree in Newick form.
475 void Tree::createNewickFile(string f) {
477 int root = findRoot();
478 //filename = getRootName(globaldata->getTreeFile()) + "newick";
480 openOutputFile(filename, out);
484 // you are at the end of the tree
488 catch(exception& e) {
489 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function createNewickFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
493 cout << "An unknown error has occurred in the Tree class function createNewickFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
498 /*****************************************************************/
499 //This function finds the index of the root node.
501 int Tree::findRoot() {
503 for (int i = 0; i < numNodes; i++) {
505 if (tree[i].getParent() == -1) { return i; }
509 catch(exception& e) {
510 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function findRoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
514 cout << "An unknown error has occurred in the Tree class function findRoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
519 /*****************************************************************/
520 void Tree::printBranch(int node) {
523 // you are not a leaf
524 if (tree[node].getLChild() != -1) {
526 printBranch(tree[node].getLChild());
528 printBranch(tree[node].getRChild());
530 }else { //you are a leaf
531 out << tree[node].getName() << ":" << tree[node].getBranchLength();
535 catch(exception& e) {
536 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
540 cout << "An unknown error has occurred in the Tree class function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
545 /*****************************************************************/
547 void Tree::setGroups() {
549 //if the user has not entered specific groups to analyze then do them all
550 if (globaldata->Groups.size() != 0) {
551 //check that groups are valid
552 for (int i = 0; i < globaldata->Groups.size(); i++) {
553 if (globaldata->gTreemap->isValidGroup(globaldata->Groups[i]) != true) {
554 cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
555 // erase the invalid group from globaldata->Groups
556 globaldata->Groups.erase (globaldata->Groups.begin()+i);
560 //if the user only entered invalid groups
561 if (globaldata->Groups.size() == 0) {
562 cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl;
563 for (int i = 0; i < globaldata->gTreemap->namesOfGroups.size(); i++) {
564 globaldata->Groups.push_back(globaldata->gTreemap->namesOfGroups[i]);
569 for (int i = 0; i < globaldata->gTreemap->namesOfGroups.size(); i++) {
570 globaldata->Groups.push_back(globaldata->gTreemap->namesOfGroups[i]);
574 catch(exception& e) {
575 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
579 cout << "An unknown error has occurred in the Tree class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
585 /*****************************************************************/
587 void Tree::printTree() {
589 for(int i=0;i<numNodes;i++){
596 /*****************************************************************/