]> git.donarmstrong.com Git - mothur.git/blob - tree.cpp
added bootstrap.shared command and fixed some bugs with heatmap
[mothur.git] / tree.cpp
1 /*
2  *  tree.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/22/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "tree.h"
11
12
13 /*****************************************************************/
14 Tree::Tree() {
15         try {
16                 globaldata = GlobalData::getInstance();
17                 
18                 numLeaves = globaldata->Treenames.size();
19                 numNodes = 2*numLeaves - 1;
20                 
21                 tree.resize(numNodes);
22
23                 //initialize tree with correct number of nodes, name and group info.
24                 for (int i = 0; i < numNodes; i++) {
25                         //initialize leaf nodes
26                         if (i <= (numLeaves-1)) {
27                                 tree[i].setName(globaldata->Treenames[i]);
28                                 tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->Treenames[i]));
29                                 //set pcount and pGroup for groupname to 1.
30                                 tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
31                                 tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
32                                 //Treemap knows name, group and index to speed up search
33                                 globaldata->gTreemap->setIndex(globaldata->Treenames[i], i);
34         
35                         //intialize non leaf nodes
36                         }else if (i > (numLeaves-1)) {
37                                 tree[i].setName("");
38                                 tree[i].setGroup("");
39                         }
40                 }
41         }
42         catch(exception& e) {
43                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function Tree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
44                 exit(1);
45         }
46         catch(...) {
47                 cout << "An unknown error has occurred in the Tree class function Tree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
48                 exit(1);
49         }               
50 }
51
52 /*****************************************************************/
53 Tree::~Tree() {}
54 /*****************************************************************/
55 int Tree::getIndex(string searchName) {
56         try {
57                 //Treemap knows name, group and index to speed up search
58                 // getIndex function will return the vector index or -1 if seq is not found.
59                 int index = globaldata->gTreemap->getIndex(searchName);
60                 return index;
61                 
62         }
63         catch(exception& e) {
64                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function getIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
65                 exit(1);
66         }
67         catch(...) {
68                 cout << "An unknown error has occurred in the Tree class function getIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
69                 exit(1);
70         }               
71 }
72 /*****************************************************************/
73
74 void Tree::setIndex(string searchName, int index) {
75         try {
76                 //set index in treemap
77                 globaldata->gTreemap->setIndex(searchName, index);
78         }
79         catch(exception& e) {
80                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function setIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
81                 exit(1);
82         }
83         catch(...) {
84                 cout << "An unknown error has occurred in the Tree class function setIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
85                 exit(1);
86         }               
87 }
88 /*****************************************************************/
89 void Tree::assembleTree() {
90         try {
91                 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
92                 for (int i = numLeaves; i < numNodes; i++) {
93                         tree[i].pGroups = (mergeGroups(i));
94                         tree[i].pcount = (mergeGcounts(i));
95                 }
96         }
97         catch(exception& e) {
98                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function assembleTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
99                 exit(1);
100         }
101         catch(...) {
102                 cout << "An unknown error has occurred in the Tree class function assembleTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
103                 exit(1);
104         }               
105 }
106 /*****************************************************************/
107 void Tree::getCopy(Tree* copy) {
108         try {
109         
110                 //for each node in the tree copy its info
111                 for (int i = 0; i < numNodes; i++) {
112                         //copy name
113                         tree[i].setName(copy->tree[i].getName());
114                 
115                         //copy group
116                         tree[i].setGroup(copy->tree[i].getGroup());
117                         
118                         //copy branch length
119                         tree[i].setBranchLength(copy->tree[i].getBranchLength());
120                 
121                         //copy parent
122                         tree[i].setParent(copy->tree[i].getParent());
123                 
124                         //copy children
125                         tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
126                 
127                         //copy index in node and tmap
128                         tree[i].setIndex(copy->tree[i].getIndex());
129                         setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
130                         
131                         //copy pGroups
132                         tree[i].pGroups = copy->tree[i].pGroups;
133                 
134                         //copy pcount
135                         tree[i].pcount = copy->tree[i].pcount;
136                 }
137         }
138         catch(exception& e) {
139                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function getCopy. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
140                 exit(1);
141         }
142         catch(...) {
143                 cout << "An unknown error has occurred in the Tree class function getCopy. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
144                 exit(1);
145         }               
146 }
147 /*****************************************************************/
148 //returns a map with a groupname and the number of times that group was seen in the children
149 //for instance if your children are white and black then it would return a map with 2 entries
150 // p[white] = 1 and p[black] = 1.  Now go up a level and merge that with a node who has p[white] = 1
151 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
152
153 map<string, int> Tree::mergeGroups(int i) {
154         try {
155                 int lc = tree[i].getLChild();
156                 int rc = tree[i].getRChild();
157
158                 //set parsimony groups to left child
159                 map<string,int> parsimony = tree[lc].pGroups;
160                 
161                 int maxPars = 1;
162
163                 //look at right child groups and update maxPars if right child has something higher for that group.
164                 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
165                         it2 = parsimony.find(it->first);
166                         if (it2 != parsimony.end()) {
167                                 parsimony[it->first]++;
168                         }else {
169                                 parsimony[it->first] = 1;
170                         }
171                         
172                         if(parsimony[it->first] > maxPars){
173                                 maxPars = parsimony[it->first];
174                         }
175                 }
176         
177                 // this is true if right child had a greater parsimony for a certain group
178                 if(maxPars > 1){
179                         //erase all the groups that are only 1 because you found something with 2.
180                         for(it=parsimony.begin();it!=parsimony.end();it++){
181                                 if(it->second == 1){
182                                         parsimony.erase(it->first);
183 //                                      it--;
184                                 }
185                         }
186                         //set one remaining groups to 1
187                         //so with our above example p[white] = 2 would be left and it would become p[white] = 1
188                         for(it=parsimony.begin();it!=parsimony.end();it++){
189                                 parsimony[it->first] = 1;
190                         }
191                 
192                 }
193         
194                 return parsimony;
195         }
196         catch(exception& e) {
197                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
198                 exit(1);
199         }
200         catch(...) {
201                 cout << "An unknown error has occurred in the Tree class function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
202                 exit(1);
203         }               
204 }
205 /*****************************************************************/
206 //returns a map with a groupname and the number of times that group was seen in the children
207 //for instance if your children are white and black then it would return a map with 2 entries
208 // p[white] = 1 and p[black] = 1.  Now go up a level and merge that with a node who has p[white] = 1
209 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
210
211 map<string, int> Tree::mergeUserGroups(int i, vector<string> g) {
212         try {
213         
214                 int lc = tree[i].getLChild();
215                 int rc = tree[i].getRChild();
216                 
217                 //loop through nodes groups removing the ones the user doesn't want
218                 for (it = tree[lc].pGroups.begin(); it != tree[lc].pGroups.end(); it++) {
219                         if (inUsersGroups(it->first, g) != true) { tree[lc].pGroups.erase(it->first); }
220                 }
221                 
222                 //loop through nodes groups removing the ones the user doesn't want
223                 for (it = tree[rc].pGroups.begin(); it != tree[rc].pGroups.end(); it++) {
224                         if (inUsersGroups(it->first, g) != true) { tree[rc].pGroups.erase(it->first); }
225                 }
226
227                 //set parsimony groups to left child
228                 map<string,int> parsimony = tree[lc].pGroups;
229                 
230                 int maxPars = 1;
231
232                 //look at right child groups and update maxPars if right child has something higher for that group.
233                 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
234                         it2 = parsimony.find(it->first);
235                         if (it2 != parsimony.end()) {
236                                 parsimony[it->first]++;
237                         }else {
238                                 parsimony[it->first] = 1;
239                         }
240                         
241                         if(parsimony[it->first] > maxPars){
242                                 maxPars = parsimony[it->first];
243                         }
244                 }
245                         
246                 // this is true if right child had a greater parsimony for a certain group
247                 if(maxPars > 1){
248                         //erase all the groups that are only 1 because you found something with 2.
249                         for(it=parsimony.begin();it!=parsimony.end();it++){
250                                 if(it->second == 1){
251                                         parsimony.erase(it->first);
252                                 }
253                         }
254                         for(it=parsimony.begin();it!=parsimony.end();it++){
255                                 parsimony[it->first] = 1;
256                         }
257                 }               
258                 
259                 return parsimony;
260         }
261         catch(exception& e) {
262                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
263                 exit(1);
264         }
265         catch(...) {
266                 cout << "An unknown error has occurred in the Tree class function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
267                 exit(1);
268         }               
269 }
270
271
272 /**************************************************************************************************/
273
274 map<string,int> Tree::mergeGcounts(int position) {
275         try{
276                 map<string,int>::iterator pos;
277         
278                 int lc = tree[position].getLChild();
279                 int rc = tree[position].getRChild();
280         
281                 map<string,int> sum = tree[lc].pcount;
282     
283                 for(it=tree[rc].pcount.begin();it!=tree[rc].pcount.end();it++){
284                         sum[it->first] += it->second;
285                 }
286                 return sum;
287         }
288         catch(exception& e) {
289                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGcounts. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
290                 exit(1);
291         }
292         catch(...) {
293                 cout << "An unknown error has occurred in the Tree class function mergeGcounts. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
294                 exit(1);
295         }               
296 }
297 /**************************************************************************************************/
298
299 void Tree::randomLabels(vector<string> g) {
300         try {
301                 
302                 for(int i = 0; i < numLeaves; i++){
303                         int z;
304                         //get random index to switch with
305                         z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));        
306                         
307                         //you only want to randomize the nodes that are from a group the user wants analyzed, so
308                         //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them.
309                         bool treez, treei;
310                 
311                         treez = inUsersGroups(tree[z].getGroup(), g);
312                         treei = inUsersGroups(tree[i].getGroup(), g);
313                         
314                         if ((treez == true) && (treei == true)) {
315                                 //switches node i and node z's info.
316                                 map<string,int> lib_hold = tree[z].pGroups;
317                                 tree[z].pGroups = (tree[i].pGroups);
318                                 tree[i].pGroups = (lib_hold);
319                                 
320                                 string zgroup = tree[z].getGroup();
321                                 tree[z].setGroup(tree[i].getGroup());
322                                 tree[i].setGroup(zgroup);
323                                 
324                                 string zname = tree[z].getName();
325                                 tree[z].setName(tree[i].getName());
326                                 tree[i].setName(zname);
327                                 
328                                 map<string,int> gcount_hold = tree[z].pcount;
329                                 tree[z].pcount = (tree[i].pcount);
330                                 tree[i].pcount = (gcount_hold);
331                         }
332                 }
333         }
334         catch(exception& e) {
335                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
336                 exit(1);
337         }
338         catch(...) {
339                 cout << "An unknown error has occurred in the Tree class function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
340                 exit(1);
341         }               
342 }
343 /**************************************************************************************************/
344
345 void Tree::randomLabels(string groupA, string groupB) {
346         try {
347                 int numSeqsA = globaldata->gTreemap->seqsPerGroup[groupA];
348                 int numSeqsB = globaldata->gTreemap->seqsPerGroup[groupB];
349
350                 vector<string> randomGroups(numSeqsA+numSeqsB, groupA);
351                 for(int i=numSeqsA;i<randomGroups.size();i++){
352                         randomGroups[i] = groupB;
353                 }
354                 random_shuffle(randomGroups.begin(), randomGroups.end());
355                                 
356                 int randomCounter = 0;                          
357                 for(int i=0;i<numLeaves;i++){
358                         if(tree[i].getGroup() == groupA || tree[i].getGroup() == groupB){
359                                 tree[i].setGroup(randomGroups[randomCounter]);
360                                 tree[i].pcount.clear();
361                                 tree[i].pcount[randomGroups[randomCounter]] = 1;
362                                 tree[i].pGroups.clear();
363                                 tree[i].pGroups[randomGroups[randomCounter]] = 1;
364                                 randomCounter++;
365                         }
366                 }
367         }               
368         catch(exception& e) {
369                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
370                 exit(1);
371         }
372         catch(...) {
373                 cout << "An unknown error has occurred in the Tree class function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
374                 exit(1);
375         }               
376 }
377 /**************************************************************************************************/
378 void Tree::randomBlengths()  {
379         try {
380                 for(int i=numNodes-1;i>=0;i--){
381                         int z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));    
382                 
383                         float bl_hold = tree[z].getBranchLength();
384                         tree[z].setBranchLength(tree[i].getBranchLength());
385                         tree[i].setBranchLength(bl_hold);
386                 }
387         }
388         catch(exception& e) {
389                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomBlengths. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
390                 exit(1);
391         }
392         catch(...) {
393                 cout << "An unknown error has occurred in the Tree class function randomBlengths. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
394                 exit(1);
395         }               
396 }
397 /*************************************************************************************************/
398 void Tree::assembleRandomUnifracTree(vector<string> g) {
399         randomLabels(g);
400         assembleTree();
401 }
402 /*************************************************************************************************/
403 void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
404         randomLabels(groupA, groupB);
405         assembleTree();
406 }
407
408 /*************************************************************************************************/
409 //for now it's just random topology but may become random labels as well later that why this is such a simple function now...
410 void Tree::assembleRandomTree() {
411         randomTopology();
412         assembleTree();
413 }
414 /**************************************************************************************************/
415
416 void Tree::randomTopology() {
417         try {
418                 for(int i=0;i<numNodes;i++){
419                         tree[i].setParent(-1);
420                 }
421                 for(int i=numLeaves;i<numNodes;i++){
422                         tree[i].setChildren(-1, -1); 
423                 }
424     
425                 for(int i=numLeaves;i<numNodes;i++){
426                         int escape =0;
427                         int rnd_index1, rnd_index2;
428                         while(escape == 0){
429                                 rnd_index1 = (int)(((double)rand() / (double) RAND_MAX)*i);
430                                 if(tree[rnd_index1].getParent() == -1){escape = 1;}
431                         }
432                 
433                         escape = 0;
434                         while(escape == 0){
435                                 rnd_index2 = (int)(((double)rand() / (double) RAND_MAX)*i);
436                                 if(rnd_index2 != rnd_index1 && tree[rnd_index2].getParent() == -1){
437                                         escape = 1;
438                                 }               
439                         }
440                 
441                         tree[i].setChildren(rnd_index1,rnd_index2);
442                         tree[i].setParent(-1);
443                         tree[rnd_index1].setParent(i);
444                         tree[rnd_index2].setParent(i);
445                 }
446         }
447         catch(exception& e) {
448                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomTopology. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
449                 exit(1);
450         }
451         catch(...) {
452                 cout << "An unknown error has occurred in the Tree class function randomTopology. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
453                 exit(1);
454         }               
455 }
456 /*****************************************************************/
457 void Tree::print(ostream& out) {
458         try {
459                 int root = findRoot();
460                 printBranch(root, out);
461                 out << ";" << endl;
462         }
463         catch(exception& e) {
464                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
465                 exit(1);
466         }
467         catch(...) {
468                 cout << "An unknown error has occurred in the Tree class function print. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
469                 exit(1);
470         }               
471 }
472 /*****************************************************************/
473 // This prints out the tree in Newick form.
474 void Tree::createNewickFile(string f) {
475         try {
476                 int root = findRoot();
477                 //filename = getRootName(globaldata->getTreeFile()) + "newick";
478                 filename = f;
479                 openOutputFile(filename, out);
480                 
481                 printBranch(root, out);
482                 
483                 // you are at the end of the tree
484                 out << ";" << endl;
485                 out.close();
486         }
487         catch(exception& e) {
488                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function createNewickFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
489                 exit(1);
490         }
491         catch(...) {
492                 cout << "An unknown error has occurred in the Tree class function createNewickFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
493                 exit(1);
494         }               
495 }
496
497 /*****************************************************************/
498 //This function finds the index of the root node.
499
500 int Tree::findRoot() {
501         try {
502                 for (int i = 0; i < numNodes; i++) {
503                         //you found the root
504                         if (tree[i].getParent() == -1) { return i; }  
505                 }
506                 return -1;
507         }
508         catch(exception& e) {
509                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function findRoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
510                 exit(1);
511         }
512         catch(...) {
513                 cout << "An unknown error has occurred in the Tree class function findRoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
514                 exit(1);
515         }               
516 }
517
518 /*****************************************************************/
519 void Tree::printBranch(int node, ostream& out) {
520         try {
521                 
522                 // you are not a leaf
523                 if (tree[node].getLChild() != -1) {
524                         out << "(";
525                         printBranch(tree[node].getLChild(), out);
526                         out << ",";
527                         printBranch(tree[node].getRChild(), out);
528                         out << ")";
529                         //if there is a branch length then print it
530                         if (tree[node].getBranchLength() != -1) {
531                                 out << ":" << tree[node].getBranchLength();
532                         }
533                 }else { //you are a leaf
534                         out << tree[node].getGroup(); 
535                         //if there is a branch length then print it
536                         if (tree[node].getBranchLength() != -1) {
537                                 out << ":" << tree[node].getBranchLength();
538                         }
539                 }
540                 
541         }
542         catch(exception& e) {
543                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
544                 exit(1);
545         }
546         catch(...) {
547                 cout << "An unknown error has occurred in the Tree class function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
548                 exit(1);
549         }               
550 }
551
552 /*****************************************************************/
553
554 void Tree::printTree() {
555         
556         for(int i=0;i<numNodes;i++){
557                 cout << i << '\t';
558                 tree[i].printNode();
559         }
560         
561 }
562
563 /*****************************************************************/
564
565