]> git.donarmstrong.com Git - mothur.git/blob - tree.cpp
weightedcommand
[mothur.git] / tree.cpp
1 /*
2  *  tree.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/22/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "tree.h"
11
12
13 /*****************************************************************/
14 Tree::Tree() {
15         try {
16         
17                 globaldata = GlobalData::getInstance();
18                 numLeaves = globaldata->gTreemap->getNumSeqs();
19                 numNodes = 2*numLeaves - 1;
20                 
21                 tree.resize(numNodes);
22
23                 //initialize tree with correct number of nodes, name and group info.
24                 for (int i = 0; i < numNodes; i++) {
25                         //initialize leaf nodes
26                         if (i <= (numLeaves-1)) {
27                                 tree[i].setName(globaldata->gTreemap->namesOfSeqs[i]);
28                                 tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i]));
29                                 //the node knows its index
30                                 tree[i].setIndex(i);
31                                 //set pcount and pGroup for groupname to 1.
32                                 tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1;
33                                 tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1;
34                                 //Treemap knows name, group and index to speed up search
35                                 globaldata->gTreemap->setIndex(globaldata->gTreemap->namesOfSeqs[i], i);
36         
37                         //intialize non leaf nodes
38                         }else if (i > (numLeaves-1)) {
39                                 tree[i].setName("");
40                                 tree[i].setGroup("");
41                                 //the node knows its index
42                                 tree[i].setIndex(i);
43                         }
44                 }
45         }
46         catch(exception& e) {
47                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function Tree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
48                 exit(1);
49         }
50         catch(...) {
51                 cout << "An unknown error has occurred in the Tree class function Tree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
52                 exit(1);
53         }               
54 }
55
56 /*****************************************************************/
57
58 int Tree::getIndex(string searchName) {
59         try {
60                 //Treemap knows name, group and index to speed up search
61                 // getIndex function will return the vector index or -1 if seq is not found.
62                 int index = globaldata->gTreemap->getIndex(searchName);
63                 return index;
64                 
65         }
66         catch(exception& e) {
67                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function getIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
68                 exit(1);
69         }
70         catch(...) {
71                 cout << "An unknown error has occurred in the Tree class function getIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
72                 exit(1);
73         }               
74 }
75 /*****************************************************************/
76
77 void Tree::setIndex(string searchName, int index) {
78         try {
79                 //set index in treemap
80                 globaldata->gTreemap->setIndex(searchName, index);
81         }
82         catch(exception& e) {
83                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function setIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
84                 exit(1);
85         }
86         catch(...) {
87                 cout << "An unknown error has occurred in the Tree class function setIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
88                 exit(1);
89         }               
90 }
91 /*****************************************************************/
92 void Tree::assembleTree() {
93         try {
94                 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
95                 for (int i = numLeaves; i < numNodes; i++) {
96                         tree[i].pGroups = (mergeGroups(i));
97                         tree[i].pcount = (mergeGcounts(i));
98                 }
99         }
100         catch(exception& e) {
101                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function assembleTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
102                 exit(1);
103         }
104         catch(...) {
105                 cout << "An unknown error has occurred in the Tree class function assembleTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
106                 exit(1);
107         }               
108 }
109 /*****************************************************************/
110 void Tree::getCopy(Tree* copy) {
111         try {
112         
113                 //for each node in the tree copy its info
114                 for (int i = 0; i < numNodes; i++) {
115                         //copy name
116                         tree[i].setName(copy->tree[i].getName());
117                 
118                         //copy group
119                         tree[i].setGroup(copy->tree[i].getGroup());
120                         
121                         //copy branch length
122                         tree[i].setBranchLength(copy->tree[i].getBranchLength());
123                 
124                         //copy parent
125                         tree[i].setParent(copy->tree[i].getParent());
126                 
127                         //copy children
128                         tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
129                 
130                         //copy index in node and tmap
131                         tree[i].setIndex(copy->tree[i].getIndex());
132                         setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
133                         
134                         //copy pGroups
135                         tree[i].pGroups = copy->tree[i].pGroups;
136                 
137                         //copy pcount
138                         tree[i].pcount = copy->tree[i].pcount;
139                 }
140         }
141         catch(exception& e) {
142                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function getCopy. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
143                 exit(1);
144         }
145         catch(...) {
146                 cout << "An unknown error has occurred in the Tree class function getCopy. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
147                 exit(1);
148         }               
149 }
150 /*****************************************************************/
151 //returns a map with a groupname and the number of times that group was seen in the children
152 //for instance if your children are white and black then it would return a map with 2 entries
153 // p[white] = 1 and p[black] = 1.  Now go up a level and merge that with a node who has p[white] = 1
154 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
155
156 map<string, int> Tree::mergeGroups(int i) {
157         try {
158                 int lc = tree[i].getLChild();
159                 int rc = tree[i].getRChild();
160                 
161                 //set parsimony groups to left child
162                 map<string,int> parsimony = tree[lc].pGroups;
163                 
164                 int maxPars = 1;
165
166                 //look at right child groups and update maxPars if right child has something higher for that group.
167                 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
168                         it2 = parsimony.find(it->first);
169                         if (it2 != parsimony.end()) {
170                                 parsimony[it->first]++;
171                         }else {
172                                 parsimony[it->first] = 1;
173                         }
174                         
175                         if(parsimony[it->first] > maxPars){
176                                 maxPars = parsimony[it->first];
177                         }
178                 }
179         
180                 // this is true if right child had a greater parsimony for a certain group
181                 if(maxPars > 1){
182                         //erase all the groups that are only 1 because you found something with 2.
183                         for(it=parsimony.begin();it!=parsimony.end();it++){
184                                 if(it->second == 1){
185                                         parsimony.erase(it->first);
186                                         it--;
187                                 }
188                         }
189                         //set one remaining groups to 1
190                         //so with our above example p[white] = 2 would be left and it would become p[white] = 1
191                         for(it=parsimony.begin();it!=parsimony.end();it++){
192                                 parsimony[it->first] = 1;
193                         }
194                 
195                 }
196         
197                 return parsimony;
198         }
199         catch(exception& e) {
200                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
201                 exit(1);
202         }
203         catch(...) {
204                 cout << "An unknown error has occurred in the Tree class function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
205                 exit(1);
206         }               
207 }
208 /*****************************************************************/
209 //returns a map with a groupname and the number of times that group was seen in the children
210 //for instance if your children are white and black then it would return a map with 2 entries
211 // p[white] = 1 and p[black] = 1.  Now go up a level and merge that with a node who has p[white] = 1
212 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
213
214 map<string, int> Tree::mergeUserGroups(int i) {
215         try {
216         
217                 int lc = tree[i].getLChild();
218                 int rc = tree[i].getRChild();
219                 
220                 //loop through nodes groups removing the ones the user doesn't want
221                 for (it = tree[lc].pGroups.begin(); it != tree[lc].pGroups.end(); it++) {
222                         if (inUsersGroups(it->first, globaldata->Groups) != true) { tree[lc].pGroups.erase(it->first); }
223                 }
224                 
225                 //loop through nodes groups removing the ones the user doesn't want
226                 for (it = tree[rc].pGroups.begin(); it != tree[rc].pGroups.end(); it++) {
227                         if (inUsersGroups(it->first, globaldata->Groups) != true) { tree[rc].pGroups.erase(it->first); }
228                 }
229
230                 //set parsimony groups to left child
231                 map<string,int> parsimony = tree[lc].pGroups;
232                 
233                 int maxPars = 1;
234
235                 //look at right child groups and update maxPars if right child has something higher for that group.
236                 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
237                         it2 = parsimony.find(it->first);
238                         if (it2 != parsimony.end()) {
239                                 parsimony[it->first]++;
240                         }else {
241                                 parsimony[it->first] = 1;
242                         }
243                         
244                         if(parsimony[it->first] > maxPars){
245                                 maxPars = parsimony[it->first];
246                         }
247                 }
248         
249                 // this is true if right child had a greater parsimony for a certain group
250                 if(maxPars > 1){
251                         //erase all the groups that are only 1 because you found something with 2.
252                         for(it=parsimony.begin();it!=parsimony.end();it++){
253                                 if(it->second == 1){
254                                         parsimony.erase(it->first);
255                                         it--;
256                                 }
257                         }
258                         //set one remaining groups to 1
259                         //so with our above example p[white] = 2 would be left and it would become p[white] = 1
260                         for(it=parsimony.begin();it!=parsimony.end();it++){
261                                 parsimony[it->first] = 1;
262                         }
263                 
264                 }
265         
266                 return parsimony;
267         }
268         catch(exception& e) {
269                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
270                 exit(1);
271         }
272         catch(...) {
273                 cout << "An unknown error has occurred in the Tree class function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
274                 exit(1);
275         }               
276 }
277
278
279 /**************************************************************************************************/
280
281 map<string,int> Tree::mergeGcounts(int position) {
282         try{
283                 map<string,int>::iterator pos;
284         
285                 int lc = tree[position].getLChild();
286                 int rc = tree[position].getRChild();
287         
288                 map<string,int> sum = tree[lc].pcount;
289     
290                 for(it=tree[rc].pcount.begin();it!=tree[rc].pcount.end();it++){
291                         sum[it->first] += it->second;
292                 }
293                 return sum;
294         }
295         catch(exception& e) {
296                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGcounts. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
297                 exit(1);
298         }
299         catch(...) {
300                 cout << "An unknown error has occurred in the Tree class function mergeGcounts. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
301                 exit(1);
302         }               
303 }
304 /**************************************************************************************************/
305
306 void Tree::randomLabels() {
307         try {
308                 
309                 //set up the groups the user wants to include
310                 setGroups();
311                 
312                 for(int i = 0; i < numLeaves; i++){
313                         int z;
314                         //get random index to switch with
315                         z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));        
316                         
317                         //you only want to randomize the nodes that are from a group the user wants analyzed, so
318                         //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them.
319                         bool treez, treei;
320                 
321                         treez = inUsersGroups(tree[z].getGroup(), globaldata->Groups);
322                         treei = inUsersGroups(tree[i].getGroup(), globaldata->Groups);
323                         
324                         if ((treez == true) && (treei == true)) {
325                                 //switches node i and node z's info.
326                                 map<string,int> lib_hold = tree[z].pGroups;
327                                 tree[z].pGroups = (tree[i].pGroups);
328                                 tree[i].pGroups = (lib_hold);
329                                 
330                                 string zgroup = tree[z].getGroup();
331                                 tree[z].setGroup(tree[i].getGroup());
332                                 tree[i].setGroup(zgroup);
333                                 
334                                 string zname = tree[z].getName();
335                                 tree[z].setName(tree[i].getName());
336                                 tree[i].setName(zname);
337                                 
338                                 map<string,int> gcount_hold = tree[z].pcount;
339                                 tree[z].pcount = (tree[i].pcount);
340                                 tree[i].pcount = (gcount_hold);
341                         }
342                 }
343         }
344         catch(exception& e) {
345                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
346                 exit(1);
347         }
348         catch(...) {
349                 cout << "An unknown error has occurred in the Tree class function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
350                 exit(1);
351         }               
352 }
353 /**************************************************************************************************/
354
355 void Tree::randomLabels(string groupA, string groupB) {
356         try {
357                 for(int i = 0; i < numLeaves; i++) {
358                         int z;
359                         //get random index to switch with
360                         z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));        
361                         
362                         //you only want to randomize the nodes that are from a group the user wants analyzed, so
363                         //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them.
364                         if (((tree[z].getGroup() == groupA) || (tree[z].getGroup() == groupB)) && ((tree[i].getGroup() == groupA) || (tree[i].getGroup() == groupB))) {
365                                 //switches node i and node z's info.
366                                 map<string,int> lib_hold = tree[z].pGroups;
367                                 tree[z].pGroups = (tree[i].pGroups);
368                                 tree[i].pGroups = (lib_hold);
369                                 
370                                 string zgroup = tree[z].getGroup();
371                                 tree[z].setGroup(tree[i].getGroup());
372                                 tree[i].setGroup(zgroup);
373                                 
374                                 string zname = tree[z].getName();
375                                 tree[z].setName(tree[i].getName());
376                                 tree[i].setName(zname);
377                                 
378                                 map<string,int> gcount_hold = tree[z].pcount;
379                                 tree[z].pcount = (tree[i].pcount);
380                                 tree[i].pcount = (gcount_hold);
381                         }
382                 }
383         }
384         catch(exception& e) {
385                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
386                 exit(1);
387         }
388         catch(...) {
389                 cout << "An unknown error has occurred in the Tree class function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
390                 exit(1);
391         }               
392 }
393 /**************************************************************************************************/
394 void Tree::randomBlengths()  {
395         try {
396                 for(int i=numNodes-1;i>=0;i--){
397                         int z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));    
398                 
399                         float bl_hold = tree[z].getBranchLength();
400                         tree[z].setBranchLength(tree[i].getBranchLength());
401                         tree[i].setBranchLength(bl_hold);
402                 }
403         }
404         catch(exception& e) {
405                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomBlengths. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
406                 exit(1);
407         }
408         catch(...) {
409                 cout << "An unknown error has occurred in the Tree class function randomBlengths. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
410                 exit(1);
411         }               
412 }
413 /*************************************************************************************************/
414 void Tree::assembleRandomUnifracTree() {
415         randomLabels();
416         assembleTree();
417 }
418 /*************************************************************************************************/
419 void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
420         randomLabels(groupA, groupB);
421         assembleTree();
422 }
423
424 /*************************************************************************************************/
425 //for now it's just random topology but may become random labels as well later that why this is such a simple function now...
426 void Tree::assembleRandomTree() {
427         randomTopology();
428         assembleTree();
429 }
430 /**************************************************************************************************/
431
432 void Tree::randomTopology() {
433         try {
434                 for(int i=0;i<numNodes;i++){
435                         tree[i].setParent(-1);
436                 }
437                 for(int i=numLeaves;i<numNodes;i++){
438                         tree[i].setChildren(-1, -1); 
439                 }
440     
441                 for(int i=numLeaves;i<numNodes;i++){
442                         int escape =0;
443                         int rnd_index1, rnd_index2;
444                         while(escape == 0){
445                                 rnd_index1 = (int)(((double)rand() / (double) RAND_MAX)*i);
446                                 if(tree[rnd_index1].getParent() == -1){escape = 1;}
447                         }
448                 
449                         escape = 0;
450                         while(escape == 0){
451                                 rnd_index2 = (int)(((double)rand() / (double) RAND_MAX)*i);
452                                 if(rnd_index2 != rnd_index1 && tree[rnd_index2].getParent() == -1){
453                                         escape = 1;
454                                 }               
455                         }
456                 
457                         tree[i].setChildren(rnd_index1,rnd_index2);
458                         tree[i].setParent(-1);
459                         tree[rnd_index1].setParent(i);
460                         tree[rnd_index2].setParent(i);
461                 }
462         }
463         catch(exception& e) {
464                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomTopology. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
465                 exit(1);
466         }
467         catch(...) {
468                 cout << "An unknown error has occurred in the Tree class function randomTopology. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
469                 exit(1);
470         }               
471 }
472
473 /*****************************************************************/
474 // This prints out the tree in Newick form.
475 void Tree::createNewickFile(string f) {
476         try {
477                 int root = findRoot();
478                 //filename = getRootName(globaldata->getTreeFile()) + "newick";
479                 filename = f;
480                 openOutputFile(filename, out);
481                 
482                 printBranch(root);
483                 
484                 // you are at the end of the tree
485                 out << ";" << endl;
486                 out.close();
487         }
488         catch(exception& e) {
489                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function createNewickFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
490                 exit(1);
491         }
492         catch(...) {
493                 cout << "An unknown error has occurred in the Tree class function createNewickFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
494                 exit(1);
495         }               
496 }
497
498 /*****************************************************************/
499 //This function finds the index of the root node.
500
501 int Tree::findRoot() {
502         try {
503                 for (int i = 0; i < numNodes; i++) {
504                         //you found the root
505                         if (tree[i].getParent() == -1) { return i; }
506                 }
507                 return -1;
508         }
509         catch(exception& e) {
510                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function findRoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
511                 exit(1);
512         }
513         catch(...) {
514                 cout << "An unknown error has occurred in the Tree class function findRoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
515                 exit(1);
516         }               
517 }
518
519 /*****************************************************************/
520 void Tree::printBranch(int node) {
521         try {
522                 
523                 // you are not a leaf
524                 if (tree[node].getLChild() != -1) {
525                         out << "(";
526                         printBranch(tree[node].getLChild());
527                         out << ",";
528                         printBranch(tree[node].getRChild());
529                         out << ")";
530                 }else { //you are a leaf
531                         out << tree[node].getName() << ":" << tree[node].getBranchLength();
532                 }
533                 
534         }
535         catch(exception& e) {
536                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
537                 exit(1);
538         }
539         catch(...) {
540                 cout << "An unknown error has occurred in the Tree class function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
541                 exit(1);
542         }               
543 }
544
545 /*****************************************************************/
546
547 void Tree::setGroups() {
548         try {
549                 //if the user has not entered specific groups to analyze then do them all
550                 if (globaldata->Groups.size() != 0) {
551                         //check that groups are valid
552                         for (int i = 0; i < globaldata->Groups.size(); i++) {
553                                 if (globaldata->gTreemap->isValidGroup(globaldata->Groups[i]) != true) {
554                                         cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
555                                         // erase the invalid group from globaldata->Groups
556                                         globaldata->Groups.erase (globaldata->Groups.begin()+i);
557                                 }
558                         }
559                         
560                         //if the user only entered invalid groups
561                         if (globaldata->Groups.size() == 0) { 
562                                 cout << "When using the groups parameter you must have at least 1 valid group. I will run the command using all the groups in your groupfile." << endl; 
563                                 for (int i = 0; i < globaldata->gTreemap->namesOfGroups.size(); i++) {
564                                         globaldata->Groups.push_back(globaldata->gTreemap->namesOfGroups[i]);
565                                 }
566                         }
567                                         
568                 }else {
569                         for (int i = 0; i < globaldata->gTreemap->namesOfGroups.size(); i++) {
570                                 globaldata->Groups.push_back(globaldata->gTreemap->namesOfGroups[i]);
571                         }
572                 }
573         }
574         catch(exception& e) {
575                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
576                 exit(1);
577         }
578         catch(...) {
579                 cout << "An unknown error has occurred in the Tree class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
580                 exit(1);
581         }               
582
583 }
584
585 /*****************************************************************/
586
587 void Tree::printTree() {
588         
589         for(int i=0;i<numNodes;i++){
590                 cout << i << '\t';
591                 tree[i].printNode();
592         }
593         
594 }
595
596 /*****************************************************************/
597
598