5 * Created by Sarah Westcott on 1/22/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
13 /*****************************************************************/
17 globaldata = GlobalData::getInstance();
18 numLeaves = globaldata->gTreemap->getNumSeqs();
19 numNodes = 2*numLeaves - 1;
21 tree.resize(numNodes);
23 //initialize tree with correct number of nodes, name and group info.
24 for (int i = 0; i < numNodes; i++) {
25 //initialize leaf nodes
26 if (i <= (numLeaves-1)) {
27 tree[i].setName(globaldata->gTreemap->namesOfSeqs[i]);
28 tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i]));
29 //set pcount and pGroup for groupname to 1.
30 tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1;
31 tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1;
32 //Treemap knows name, group and index to speed up search
33 globaldata->gTreemap->setIndex(globaldata->gTreemap->namesOfSeqs[i], i);
35 //intialize non leaf nodes
36 }else if (i > (numLeaves-1)) {
43 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function Tree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
47 cout << "An unknown error has occurred in the Tree class function Tree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
52 /*****************************************************************/
53 void Tree::resetTree(){
55 numLeaves = globaldata->gTreemap->getNumSeqs();
56 numNodes = 2*numLeaves - 1;
58 tree.resize(numNodes);
60 //initialize tree with correct number of nodes, name and group info.
61 for (int i = 0; i < numNodes; i++) {
62 //initialize leaf nodes
63 if (i <= (numLeaves-1)) {
64 tree[i].setName(globaldata->gTreemap->namesOfSeqs[i]);
65 tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i]));
66 //set pcount and pGroup for groupname to 1.
67 tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1;
68 tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1;
69 //Treemap knows name, group and index to speed up search
70 globaldata->gTreemap->setIndex(globaldata->gTreemap->namesOfSeqs[i], i);
72 //intialize non leaf nodes
73 }else if (i > (numLeaves-1)) {
80 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function resetTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
84 cout << "An unknown error has occurred in the Tree class function resetTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
89 /*****************************************************************/
90 int Tree::getIndex(string searchName) {
92 //Treemap knows name, group and index to speed up search
93 // getIndex function will return the vector index or -1 if seq is not found.
94 int index = globaldata->gTreemap->getIndex(searchName);
99 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function getIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
103 cout << "An unknown error has occurred in the Tree class function getIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
107 /*****************************************************************/
109 void Tree::setIndex(string searchName, int index) {
111 //set index in treemap
112 globaldata->gTreemap->setIndex(searchName, index);
114 catch(exception& e) {
115 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function setIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
119 cout << "An unknown error has occurred in the Tree class function setIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
123 /*****************************************************************/
124 void Tree::assembleTree() {
126 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
127 for (int i = numLeaves; i < numNodes; i++) {
128 tree[i].pGroups = (mergeGroups(i));
129 tree[i].pcount = (mergeGcounts(i));
132 catch(exception& e) {
133 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function assembleTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
137 cout << "An unknown error has occurred in the Tree class function assembleTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
141 /*****************************************************************/
142 void Tree::getCopy(Tree* copy) {
145 //for each node in the tree copy its info
146 for (int i = 0; i < numNodes; i++) {
148 tree[i].setName(copy->tree[i].getName());
151 tree[i].setGroup(copy->tree[i].getGroup());
154 tree[i].setBranchLength(copy->tree[i].getBranchLength());
157 tree[i].setParent(copy->tree[i].getParent());
160 tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
162 //copy index in node and tmap
163 tree[i].setIndex(copy->tree[i].getIndex());
164 setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
167 tree[i].pGroups = copy->tree[i].pGroups;
170 tree[i].pcount = copy->tree[i].pcount;
173 catch(exception& e) {
174 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function getCopy. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
178 cout << "An unknown error has occurred in the Tree class function getCopy. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
182 /*****************************************************************/
183 //returns a map with a groupname and the number of times that group was seen in the children
184 //for instance if your children are white and black then it would return a map with 2 entries
185 // p[white] = 1 and p[black] = 1. Now go up a level and merge that with a node who has p[white] = 1
186 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
188 map<string, int> Tree::mergeGroups(int i) {
190 int lc = tree[i].getLChild();
191 int rc = tree[i].getRChild();
192 cout << i << lc << rc << endl;
193 //set parsimony groups to left child
194 map<string,int> parsimony = tree[lc].pGroups;
198 //look at right child groups and update maxPars if right child has something higher for that group.
199 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
200 it2 = parsimony.find(it->first);
201 if (it2 != parsimony.end()) {
202 parsimony[it->first]++;
204 parsimony[it->first] = 1;
207 if(parsimony[it->first] > maxPars){
208 maxPars = parsimony[it->first];
212 // this is true if right child had a greater parsimony for a certain group
214 //erase all the groups that are only 1 because you found something with 2.
215 for(it=parsimony.begin();it!=parsimony.end();it++){
217 parsimony.erase(it->first);
221 //set one remaining groups to 1
222 //so with our above example p[white] = 2 would be left and it would become p[white] = 1
223 for(it=parsimony.begin();it!=parsimony.end();it++){
224 parsimony[it->first] = 1;
231 catch(exception& e) {
232 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
236 cout << "An unknown error has occurred in the Tree class function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
240 /*****************************************************************/
241 //returns a map with a groupname and the number of times that group was seen in the children
242 //for instance if your children are white and black then it would return a map with 2 entries
243 // p[white] = 1 and p[black] = 1. Now go up a level and merge that with a node who has p[white] = 1
244 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
246 map<string, int> Tree::mergeUserGroups(int i, vector<string> g) {
249 int lc = tree[i].getLChild();
250 int rc = tree[i].getRChild();
252 //loop through nodes groups removing the ones the user doesn't want
253 for (it = tree[lc].pGroups.begin(); it != tree[lc].pGroups.end(); it++) {
254 if (inUsersGroups(it->first, g) != true) { tree[lc].pGroups.erase(it->first); }
257 //loop through nodes groups removing the ones the user doesn't want
258 for (it = tree[rc].pGroups.begin(); it != tree[rc].pGroups.end(); it++) {
259 if (inUsersGroups(it->first, g) != true) { tree[rc].pGroups.erase(it->first); }
262 //set parsimony groups to left child
263 map<string,int> parsimony = tree[lc].pGroups;
267 //look at right child groups and update maxPars if right child has something higher for that group.
268 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
269 it2 = parsimony.find(it->first);
270 if (it2 != parsimony.end()) {
271 parsimony[it->first]++;
273 parsimony[it->first] = 1;
276 if(parsimony[it->first] > maxPars){
277 maxPars = parsimony[it->first];
281 // this is true if right child had a greater parsimony for a certain group
283 //erase all the groups that are only 1 because you found something with 2.
284 for(it=parsimony.begin();it!=parsimony.end();it++){
286 parsimony.erase(it->first);
290 //set one remaining groups to 1
291 //so with our above example p[white] = 2 would be left and it would become p[white] = 1
292 for(it=parsimony.begin();it!=parsimony.end();it++){
293 parsimony[it->first] = 1;
300 catch(exception& e) {
301 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
305 cout << "An unknown error has occurred in the Tree class function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
311 /**************************************************************************************************/
313 map<string,int> Tree::mergeGcounts(int position) {
315 map<string,int>::iterator pos;
317 int lc = tree[position].getLChild();
318 int rc = tree[position].getRChild();
320 map<string,int> sum = tree[lc].pcount;
322 for(it=tree[rc].pcount.begin();it!=tree[rc].pcount.end();it++){
323 sum[it->first] += it->second;
327 catch(exception& e) {
328 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGcounts. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
332 cout << "An unknown error has occurred in the Tree class function mergeGcounts. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
336 /**************************************************************************************************/
338 void Tree::randomLabels(vector<string> g) {
341 for(int i = 0; i < numLeaves; i++){
343 //get random index to switch with
344 z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
346 //you only want to randomize the nodes that are from a group the user wants analyzed, so
347 //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them.
350 treez = inUsersGroups(tree[z].getGroup(), g);
351 treei = inUsersGroups(tree[i].getGroup(), g);
353 if ((treez == true) && (treei == true)) {
354 //switches node i and node z's info.
355 map<string,int> lib_hold = tree[z].pGroups;
356 tree[z].pGroups = (tree[i].pGroups);
357 tree[i].pGroups = (lib_hold);
359 string zgroup = tree[z].getGroup();
360 tree[z].setGroup(tree[i].getGroup());
361 tree[i].setGroup(zgroup);
363 string zname = tree[z].getName();
364 tree[z].setName(tree[i].getName());
365 tree[i].setName(zname);
367 map<string,int> gcount_hold = tree[z].pcount;
368 tree[z].pcount = (tree[i].pcount);
369 tree[i].pcount = (gcount_hold);
373 catch(exception& e) {
374 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
378 cout << "An unknown error has occurred in the Tree class function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
382 /**************************************************************************************************/
384 void Tree::randomLabels(string groupA, string groupB) {
386 int numSeqsA = globaldata->gTreemap->seqsPerGroup[groupA];
387 int numSeqsB = globaldata->gTreemap->seqsPerGroup[groupB];
389 vector<string> randomGroups(numSeqsA+numSeqsB, groupA);
390 for(int i=numSeqsA;i<randomGroups.size();i++){
391 randomGroups[i] = groupB;
393 random_shuffle(randomGroups.begin(), randomGroups.end());
395 int randomCounter = 0;
396 for(int i=0;i<numLeaves;i++){
397 if(tree[i].getGroup() == groupA || tree[i].getGroup() == groupB){
398 tree[i].setGroup(randomGroups[randomCounter]);
399 tree[i].pcount.clear();
400 tree[i].pcount[randomGroups[randomCounter]] = 1;
401 tree[i].pGroups.clear();
402 tree[i].pGroups[randomGroups[randomCounter]] = 1;
407 catch(exception& e) {
408 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
412 cout << "An unknown error has occurred in the Tree class function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
416 /**************************************************************************************************/
417 void Tree::randomBlengths() {
419 for(int i=numNodes-1;i>=0;i--){
420 int z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));
422 float bl_hold = tree[z].getBranchLength();
423 tree[z].setBranchLength(tree[i].getBranchLength());
424 tree[i].setBranchLength(bl_hold);
427 catch(exception& e) {
428 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomBlengths. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
432 cout << "An unknown error has occurred in the Tree class function randomBlengths. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
436 /*************************************************************************************************/
437 void Tree::assembleRandomUnifracTree(vector<string> g) {
441 /*************************************************************************************************/
442 void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
443 randomLabels(groupA, groupB);
447 /*************************************************************************************************/
448 //for now it's just random topology but may become random labels as well later that why this is such a simple function now...
449 void Tree::assembleRandomTree() {
453 /**************************************************************************************************/
455 void Tree::randomTopology() {
457 for(int i=0;i<numNodes;i++){
458 tree[i].setParent(-1);
460 for(int i=numLeaves;i<numNodes;i++){
461 tree[i].setChildren(-1, -1);
464 for(int i=numLeaves;i<numNodes;i++){
466 int rnd_index1, rnd_index2;
468 rnd_index1 = (int)(((double)rand() / (double) RAND_MAX)*i);
469 if(tree[rnd_index1].getParent() == -1){escape = 1;}
474 rnd_index2 = (int)(((double)rand() / (double) RAND_MAX)*i);
475 if(rnd_index2 != rnd_index1 && tree[rnd_index2].getParent() == -1){
480 tree[i].setChildren(rnd_index1,rnd_index2);
481 tree[i].setParent(-1);
482 tree[rnd_index1].setParent(i);
483 tree[rnd_index2].setParent(i);
486 catch(exception& e) {
487 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomTopology. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
491 cout << "An unknown error has occurred in the Tree class function randomTopology. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
496 /*****************************************************************/
497 // This prints out the tree in Newick form.
498 void Tree::createNewickFile(string f) {
500 int root = findRoot();
501 //filename = getRootName(globaldata->getTreeFile()) + "newick";
503 openOutputFile(filename, out);
507 // you are at the end of the tree
511 catch(exception& e) {
512 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function createNewickFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
516 cout << "An unknown error has occurred in the Tree class function createNewickFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
521 /*****************************************************************/
522 //This function finds the index of the root node.
524 int Tree::findRoot() {
526 for (int i = 0; i < numNodes; i++) {
528 if (tree[i].getParent() == -1) { return i; }
532 catch(exception& e) {
533 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function findRoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
537 cout << "An unknown error has occurred in the Tree class function findRoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
542 /*****************************************************************/
543 void Tree::printBranch(int node) {
546 // you are not a leaf
547 if (tree[node].getLChild() != -1) {
549 printBranch(tree[node].getLChild());
551 printBranch(tree[node].getRChild());
553 }else { //you are a leaf
554 out << tree[node].getGroup() << ":" << tree[node].getBranchLength();
558 catch(exception& e) {
559 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
563 cout << "An unknown error has occurred in the Tree class function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
568 /*****************************************************************/
570 void Tree::printTree() {
572 for(int i=0;i<numNodes;i++){
579 /*****************************************************************/