]> git.donarmstrong.com Git - mothur.git/blob - tree.cpp
fixed bug in read tree
[mothur.git] / tree.cpp
1 /*
2  *  tree.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/22/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "tree.h"
11
12
13 /*****************************************************************/
14 Tree::Tree() {
15         try {
16         
17                 globaldata = GlobalData::getInstance();
18                 numLeaves = globaldata->gTreemap->getNumSeqs();
19                 numNodes = 2*numLeaves - 1;
20                 
21                 tree.resize(numNodes);
22
23                 //initialize tree with correct number of nodes, name and group info.
24                 for (int i = 0; i < numNodes; i++) {
25                         //initialize leaf nodes
26                         if (i <= (numLeaves-1)) {
27                                 tree[i].setName(globaldata->gTreemap->namesOfSeqs[i]);
28                                 tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i]));
29                                 //set pcount and pGroup for groupname to 1.
30                                 tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1;
31                                 tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1;
32                                 //Treemap knows name, group and index to speed up search
33                                 globaldata->gTreemap->setIndex(globaldata->gTreemap->namesOfSeqs[i], i);
34         
35                         //intialize non leaf nodes
36                         }else if (i > (numLeaves-1)) {
37                                 tree[i].setName("");
38                                 tree[i].setGroup("");
39                         }
40                 }
41         }
42         catch(exception& e) {
43                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function Tree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
44                 exit(1);
45         }
46         catch(...) {
47                 cout << "An unknown error has occurred in the Tree class function Tree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
48                 exit(1);
49         }               
50 }
51
52 /*****************************************************************/
53 int Tree::getIndex(string searchName) {
54         try {
55                 //Treemap knows name, group and index to speed up search
56                 // getIndex function will return the vector index or -1 if seq is not found.
57                 int index = globaldata->gTreemap->getIndex(searchName);
58                 return index;
59                 
60         }
61         catch(exception& e) {
62                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function getIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
63                 exit(1);
64         }
65         catch(...) {
66                 cout << "An unknown error has occurred in the Tree class function getIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
67                 exit(1);
68         }               
69 }
70 /*****************************************************************/
71
72 void Tree::setIndex(string searchName, int index) {
73         try {
74                 //set index in treemap
75                 globaldata->gTreemap->setIndex(searchName, index);
76         }
77         catch(exception& e) {
78                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function setIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
79                 exit(1);
80         }
81         catch(...) {
82                 cout << "An unknown error has occurred in the Tree class function setIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
83                 exit(1);
84         }               
85 }
86 /*****************************************************************/
87 void Tree::assembleTree() {
88         try {
89                 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
90                 for (int i = numLeaves; i < numNodes; i++) {
91                         tree[i].pGroups = (mergeGroups(i));
92                         tree[i].pcount = (mergeGcounts(i));
93                 }
94         }
95         catch(exception& e) {
96                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function assembleTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
97                 exit(1);
98         }
99         catch(...) {
100                 cout << "An unknown error has occurred in the Tree class function assembleTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
101                 exit(1);
102         }               
103 }
104 /*****************************************************************/
105 void Tree::getCopy(Tree* copy) {
106         try {
107         
108                 //for each node in the tree copy its info
109                 for (int i = 0; i < numNodes; i++) {
110                         //copy name
111                         tree[i].setName(copy->tree[i].getName());
112                 
113                         //copy group
114                         tree[i].setGroup(copy->tree[i].getGroup());
115                         
116                         //copy branch length
117                         tree[i].setBranchLength(copy->tree[i].getBranchLength());
118                 
119                         //copy parent
120                         tree[i].setParent(copy->tree[i].getParent());
121                 
122                         //copy children
123                         tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
124                 
125                         //copy index in node and tmap
126                         tree[i].setIndex(copy->tree[i].getIndex());
127                         setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
128                         
129                         //copy pGroups
130                         tree[i].pGroups = copy->tree[i].pGroups;
131                 
132                         //copy pcount
133                         tree[i].pcount = copy->tree[i].pcount;
134                 }
135         }
136         catch(exception& e) {
137                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function getCopy. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
138                 exit(1);
139         }
140         catch(...) {
141                 cout << "An unknown error has occurred in the Tree class function getCopy. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
142                 exit(1);
143         }               
144 }
145 /*****************************************************************/
146 //returns a map with a groupname and the number of times that group was seen in the children
147 //for instance if your children are white and black then it would return a map with 2 entries
148 // p[white] = 1 and p[black] = 1.  Now go up a level and merge that with a node who has p[white] = 1
149 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
150
151 map<string, int> Tree::mergeGroups(int i) {
152         try {
153                 int lc = tree[i].getLChild();
154                 int rc = tree[i].getRChild();
155
156                 //set parsimony groups to left child
157                 map<string,int> parsimony = tree[lc].pGroups;
158                 
159                 int maxPars = 1;
160
161                 //look at right child groups and update maxPars if right child has something higher for that group.
162                 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
163                         it2 = parsimony.find(it->first);
164                         if (it2 != parsimony.end()) {
165                                 parsimony[it->first]++;
166                         }else {
167                                 parsimony[it->first] = 1;
168                         }
169                         
170                         if(parsimony[it->first] > maxPars){
171                                 maxPars = parsimony[it->first];
172                         }
173                 }
174         
175                 // this is true if right child had a greater parsimony for a certain group
176                 if(maxPars > 1){
177                         //erase all the groups that are only 1 because you found something with 2.
178                         for(it=parsimony.begin();it!=parsimony.end();it++){
179                                 if(it->second == 1){
180                                         parsimony.erase(it->first);
181                                         it--;
182                                 }
183                         }
184                         //set one remaining groups to 1
185                         //so with our above example p[white] = 2 would be left and it would become p[white] = 1
186                         for(it=parsimony.begin();it!=parsimony.end();it++){
187                                 parsimony[it->first] = 1;
188                         }
189                 
190                 }
191         
192                 return parsimony;
193         }
194         catch(exception& e) {
195                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
196                 exit(1);
197         }
198         catch(...) {
199                 cout << "An unknown error has occurred in the Tree class function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
200                 exit(1);
201         }               
202 }
203 /*****************************************************************/
204 //returns a map with a groupname and the number of times that group was seen in the children
205 //for instance if your children are white and black then it would return a map with 2 entries
206 // p[white] = 1 and p[black] = 1.  Now go up a level and merge that with a node who has p[white] = 1
207 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
208
209 map<string, int> Tree::mergeUserGroups(int i, vector<string> g) {
210         try {
211         
212                 int lc = tree[i].getLChild();
213                 int rc = tree[i].getRChild();
214                 
215                 //loop through nodes groups removing the ones the user doesn't want
216                 for (it = tree[lc].pGroups.begin(); it != tree[lc].pGroups.end(); it++) {
217                         if (inUsersGroups(it->first, g) != true) { tree[lc].pGroups.erase(it->first); }
218                 }
219                 
220                 //loop through nodes groups removing the ones the user doesn't want
221                 for (it = tree[rc].pGroups.begin(); it != tree[rc].pGroups.end(); it++) {
222                         if (inUsersGroups(it->first, g) != true) { tree[rc].pGroups.erase(it->first); }
223                 }
224
225                 //set parsimony groups to left child
226                 map<string,int> parsimony = tree[lc].pGroups;
227                 
228                 int maxPars = 1;
229
230                 //look at right child groups and update maxPars if right child has something higher for that group.
231                 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
232                         it2 = parsimony.find(it->first);
233                         if (it2 != parsimony.end()) {
234                                 parsimony[it->first]++;
235                         }else {
236                                 parsimony[it->first] = 1;
237                         }
238                         
239                         if(parsimony[it->first] > maxPars){
240                                 maxPars = parsimony[it->first];
241                         }
242                 }
243         
244                 // this is true if right child had a greater parsimony for a certain group
245                 if(maxPars > 1){
246                         //erase all the groups that are only 1 because you found something with 2.
247                         for(it=parsimony.begin();it!=parsimony.end();it++){
248                                 if(it->second == 1){
249                                         parsimony.erase(it->first);
250                                         it--;
251                                 }
252                         }
253                         //set one remaining groups to 1
254                         //so with our above example p[white] = 2 would be left and it would become p[white] = 1
255                         for(it=parsimony.begin();it!=parsimony.end();it++){
256                                 parsimony[it->first] = 1;
257                         }
258                 
259                 }
260         
261                 return parsimony;
262         }
263         catch(exception& e) {
264                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
265                 exit(1);
266         }
267         catch(...) {
268                 cout << "An unknown error has occurred in the Tree class function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
269                 exit(1);
270         }               
271 }
272
273
274 /**************************************************************************************************/
275
276 map<string,int> Tree::mergeGcounts(int position) {
277         try{
278                 map<string,int>::iterator pos;
279         
280                 int lc = tree[position].getLChild();
281                 int rc = tree[position].getRChild();
282         
283                 map<string,int> sum = tree[lc].pcount;
284     
285                 for(it=tree[rc].pcount.begin();it!=tree[rc].pcount.end();it++){
286                         sum[it->first] += it->second;
287                 }
288                 return sum;
289         }
290         catch(exception& e) {
291                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGcounts. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
292                 exit(1);
293         }
294         catch(...) {
295                 cout << "An unknown error has occurred in the Tree class function mergeGcounts. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
296                 exit(1);
297         }               
298 }
299 /**************************************************************************************************/
300
301 void Tree::randomLabels(vector<string> g) {
302         try {
303                 
304                 for(int i = 0; i < numLeaves; i++){
305                         int z;
306                         //get random index to switch with
307                         z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));        
308                         
309                         //you only want to randomize the nodes that are from a group the user wants analyzed, so
310                         //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them.
311                         bool treez, treei;
312                 
313                         treez = inUsersGroups(tree[z].getGroup(), g);
314                         treei = inUsersGroups(tree[i].getGroup(), g);
315                         
316                         if ((treez == true) && (treei == true)) {
317                                 //switches node i and node z's info.
318                                 map<string,int> lib_hold = tree[z].pGroups;
319                                 tree[z].pGroups = (tree[i].pGroups);
320                                 tree[i].pGroups = (lib_hold);
321                                 
322                                 string zgroup = tree[z].getGroup();
323                                 tree[z].setGroup(tree[i].getGroup());
324                                 tree[i].setGroup(zgroup);
325                                 
326                                 string zname = tree[z].getName();
327                                 tree[z].setName(tree[i].getName());
328                                 tree[i].setName(zname);
329                                 
330                                 map<string,int> gcount_hold = tree[z].pcount;
331                                 tree[z].pcount = (tree[i].pcount);
332                                 tree[i].pcount = (gcount_hold);
333                         }
334                 }
335         }
336         catch(exception& e) {
337                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
338                 exit(1);
339         }
340         catch(...) {
341                 cout << "An unknown error has occurred in the Tree class function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
342                 exit(1);
343         }               
344 }
345 /**************************************************************************************************/
346
347 void Tree::randomLabels(string groupA, string groupB) {
348         try {
349                 int numSeqsA = globaldata->gTreemap->seqsPerGroup[groupA];
350                 int numSeqsB = globaldata->gTreemap->seqsPerGroup[groupB];
351
352                 vector<string> randomGroups(numSeqsA+numSeqsB, groupA);
353                 for(int i=numSeqsA;i<randomGroups.size();i++){
354                         randomGroups[i] = groupB;
355                 }
356                 random_shuffle(randomGroups.begin(), randomGroups.end());
357                                 
358                 int randomCounter = 0;                          
359                 for(int i=0;i<numLeaves;i++){
360                         if(tree[i].getGroup() == groupA || tree[i].getGroup() == groupB){
361                                 tree[i].setGroup(randomGroups[randomCounter]);
362                                 tree[i].pcount.clear();
363                                 tree[i].pcount[randomGroups[randomCounter]] = 1;
364                                 tree[i].pGroups.clear();
365                                 tree[i].pGroups[randomGroups[randomCounter]] = 1;
366                                 randomCounter++;
367                         }
368                 }
369         }               
370         catch(exception& e) {
371                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
372                 exit(1);
373         }
374         catch(...) {
375                 cout << "An unknown error has occurred in the Tree class function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
376                 exit(1);
377         }               
378 }
379 /**************************************************************************************************/
380 void Tree::randomBlengths()  {
381         try {
382                 for(int i=numNodes-1;i>=0;i--){
383                         int z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));    
384                 
385                         float bl_hold = tree[z].getBranchLength();
386                         tree[z].setBranchLength(tree[i].getBranchLength());
387                         tree[i].setBranchLength(bl_hold);
388                 }
389         }
390         catch(exception& e) {
391                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomBlengths. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
392                 exit(1);
393         }
394         catch(...) {
395                 cout << "An unknown error has occurred in the Tree class function randomBlengths. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
396                 exit(1);
397         }               
398 }
399 /*************************************************************************************************/
400 void Tree::assembleRandomUnifracTree(vector<string> g) {
401         randomLabels(g);
402         assembleTree();
403 }
404 /*************************************************************************************************/
405 void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
406         randomLabels(groupA, groupB);
407         assembleTree();
408 }
409
410 /*************************************************************************************************/
411 //for now it's just random topology but may become random labels as well later that why this is such a simple function now...
412 void Tree::assembleRandomTree() {
413         randomTopology();
414         assembleTree();
415 }
416 /**************************************************************************************************/
417
418 void Tree::randomTopology() {
419         try {
420                 for(int i=0;i<numNodes;i++){
421                         tree[i].setParent(-1);
422                 }
423                 for(int i=numLeaves;i<numNodes;i++){
424                         tree[i].setChildren(-1, -1); 
425                 }
426     
427                 for(int i=numLeaves;i<numNodes;i++){
428                         int escape =0;
429                         int rnd_index1, rnd_index2;
430                         while(escape == 0){
431                                 rnd_index1 = (int)(((double)rand() / (double) RAND_MAX)*i);
432                                 if(tree[rnd_index1].getParent() == -1){escape = 1;}
433                         }
434                 
435                         escape = 0;
436                         while(escape == 0){
437                                 rnd_index2 = (int)(((double)rand() / (double) RAND_MAX)*i);
438                                 if(rnd_index2 != rnd_index1 && tree[rnd_index2].getParent() == -1){
439                                         escape = 1;
440                                 }               
441                         }
442                 
443                         tree[i].setChildren(rnd_index1,rnd_index2);
444                         tree[i].setParent(-1);
445                         tree[rnd_index1].setParent(i);
446                         tree[rnd_index2].setParent(i);
447                 }
448         }
449         catch(exception& e) {
450                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomTopology. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
451                 exit(1);
452         }
453         catch(...) {
454                 cout << "An unknown error has occurred in the Tree class function randomTopology. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
455                 exit(1);
456         }               
457 }
458
459 /*****************************************************************/
460 // This prints out the tree in Newick form.
461 void Tree::createNewickFile(string f) {
462         try {
463                 int root = findRoot();
464                 //filename = getRootName(globaldata->getTreeFile()) + "newick";
465                 filename = f;
466                 openOutputFile(filename, out);
467                 
468                 printBranch(root);
469                 
470                 // you are at the end of the tree
471                 out << ";" << endl;
472                 out.close();
473         }
474         catch(exception& e) {
475                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function createNewickFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
476                 exit(1);
477         }
478         catch(...) {
479                 cout << "An unknown error has occurred in the Tree class function createNewickFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
480                 exit(1);
481         }               
482 }
483
484 /*****************************************************************/
485 //This function finds the index of the root node.
486
487 int Tree::findRoot() {
488         try {
489                 for (int i = 0; i < numNodes; i++) {
490                         //you found the root
491                         if (tree[i].getParent() == -1) { return i; }
492                 }
493                 return -1;
494         }
495         catch(exception& e) {
496                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function findRoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
497                 exit(1);
498         }
499         catch(...) {
500                 cout << "An unknown error has occurred in the Tree class function findRoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
501                 exit(1);
502         }               
503 }
504
505 /*****************************************************************/
506 void Tree::printBranch(int node) {
507         try {
508                 
509                 // you are not a leaf
510                 if (tree[node].getLChild() != -1) {
511                         out << "(";
512                         printBranch(tree[node].getLChild());
513                         out << ",";
514                         printBranch(tree[node].getRChild());
515                         out << ")";
516                 }else { //you are a leaf
517                         out << tree[node].getGroup() << ":" << tree[node].getBranchLength();
518                 }
519                 
520         }
521         catch(exception& e) {
522                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
523                 exit(1);
524         }
525         catch(...) {
526                 cout << "An unknown error has occurred in the Tree class function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
527                 exit(1);
528         }               
529 }
530
531 /*****************************************************************/
532
533 void Tree::printTree() {
534         
535         for(int i=0;i<numNodes;i++){
536                 cout << i << '\t';
537                 tree[i].printNode();
538         }
539         
540 }
541
542 /*****************************************************************/
543
544