]> git.donarmstrong.com Git - mothur.git/blob - tree.cpp
1e115bddad1a451c181c4c26b90b758788df1155
[mothur.git] / tree.cpp
1 /*
2  *  tree.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/22/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "tree.h"
11
12
13 /*****************************************************************/
14 Tree::Tree() {
15         try {
16         
17                 globaldata = GlobalData::getInstance();
18                 numLeaves = globaldata->gTreemap->getNumSeqs();
19                 numNodes = 2*numLeaves - 1;
20                 
21                 tree.resize(numNodes);
22
23                 //initialize tree with correct number of nodes, name and group info.
24                 for (int i = 0; i < numNodes; i++) {
25                         //initialize leaf nodes
26                         if (i <= (numLeaves-1)) {
27                                 tree[i].setName(globaldata->gTreemap->namesOfSeqs[i]);
28                                 tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i]));
29                                 //set pcount and pGroup for groupname to 1.
30                                 tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1;
31                                 tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1;
32                                 //Treemap knows name, group and index to speed up search
33                                 globaldata->gTreemap->setIndex(globaldata->gTreemap->namesOfSeqs[i], i);
34         
35                         //intialize non leaf nodes
36                         }else if (i > (numLeaves-1)) {
37                                 tree[i].setName("");
38                                 tree[i].setGroup("");
39                         }
40                 }
41         }
42         catch(exception& e) {
43                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function Tree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
44                 exit(1);
45         }
46         catch(...) {
47                 cout << "An unknown error has occurred in the Tree class function Tree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
48                 exit(1);
49         }               
50 }
51
52 /*****************************************************************/
53 int Tree::getIndex(string searchName) {
54         try {
55                 //Treemap knows name, group and index to speed up search
56                 // getIndex function will return the vector index or -1 if seq is not found.
57                 int index = globaldata->gTreemap->getIndex(searchName);
58                 return index;
59                 
60         }
61         catch(exception& e) {
62                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function getIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
63                 exit(1);
64         }
65         catch(...) {
66                 cout << "An unknown error has occurred in the Tree class function getIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
67                 exit(1);
68         }               
69 }
70 /*****************************************************************/
71
72 void Tree::setIndex(string searchName, int index) {
73         try {
74                 //set index in treemap
75                 globaldata->gTreemap->setIndex(searchName, index);
76         }
77         catch(exception& e) {
78                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function setIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
79                 exit(1);
80         }
81         catch(...) {
82                 cout << "An unknown error has occurred in the Tree class function setIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
83                 exit(1);
84         }               
85 }
86 /*****************************************************************/
87 void Tree::assembleTree() {
88         try {
89                 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
90                 for (int i = numLeaves; i < numNodes; i++) {
91                         tree[i].pGroups = (mergeGroups(i));
92                         tree[i].pcount = (mergeGcounts(i));
93                 }
94         }
95         catch(exception& e) {
96                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function assembleTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
97                 exit(1);
98         }
99         catch(...) {
100                 cout << "An unknown error has occurred in the Tree class function assembleTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
101                 exit(1);
102         }               
103 }
104 /*****************************************************************/
105 void Tree::getCopy(Tree* copy) {
106         try {
107         
108                 //for each node in the tree copy its info
109                 for (int i = 0; i < numNodes; i++) {
110                         //copy name
111                         tree[i].setName(copy->tree[i].getName());
112                 
113                         //copy group
114                         tree[i].setGroup(copy->tree[i].getGroup());
115                         
116                         //copy branch length
117                         tree[i].setBranchLength(copy->tree[i].getBranchLength());
118                 
119                         //copy parent
120                         tree[i].setParent(copy->tree[i].getParent());
121                 
122                         //copy children
123                         tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
124                 
125                         //copy index in node and tmap
126                         tree[i].setIndex(copy->tree[i].getIndex());
127                         setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
128                         
129                         //copy pGroups
130                         tree[i].pGroups = copy->tree[i].pGroups;
131                 
132                         //copy pcount
133                         tree[i].pcount = copy->tree[i].pcount;
134                 }
135         }
136         catch(exception& e) {
137                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function getCopy. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
138                 exit(1);
139         }
140         catch(...) {
141                 cout << "An unknown error has occurred in the Tree class function getCopy. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
142                 exit(1);
143         }               
144 }
145 /*****************************************************************/
146 //returns a map with a groupname and the number of times that group was seen in the children
147 //for instance if your children are white and black then it would return a map with 2 entries
148 // p[white] = 1 and p[black] = 1.  Now go up a level and merge that with a node who has p[white] = 1
149 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
150
151 map<string, int> Tree::mergeGroups(int i) {
152         try {
153                 int lc = tree[i].getLChild();
154                 int rc = tree[i].getRChild();
155
156                 //set parsimony groups to left child
157                 map<string,int> parsimony = tree[lc].pGroups;
158                 
159                 int maxPars = 1;
160
161                 //look at right child groups and update maxPars if right child has something higher for that group.
162                 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
163                         it2 = parsimony.find(it->first);
164                         if (it2 != parsimony.end()) {
165                                 parsimony[it->first]++;
166                         }else {
167                                 parsimony[it->first] = 1;
168                         }
169                         
170                         if(parsimony[it->first] > maxPars){
171                                 maxPars = parsimony[it->first];
172                         }
173                 }
174         
175                 // this is true if right child had a greater parsimony for a certain group
176                 if(maxPars > 1){
177                         //erase all the groups that are only 1 because you found something with 2.
178                         for(it=parsimony.begin();it!=parsimony.end();it++){
179                                 if(it->second == 1){
180                                         parsimony.erase(it->first);
181 //                                      it--;
182                                 }
183                         }
184                         //set one remaining groups to 1
185                         //so with our above example p[white] = 2 would be left and it would become p[white] = 1
186                         for(it=parsimony.begin();it!=parsimony.end();it++){
187                                 parsimony[it->first] = 1;
188                         }
189                 
190                 }
191         
192                 return parsimony;
193         }
194         catch(exception& e) {
195                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
196                 exit(1);
197         }
198         catch(...) {
199                 cout << "An unknown error has occurred in the Tree class function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
200                 exit(1);
201         }               
202 }
203 /*****************************************************************/
204 //returns a map with a groupname and the number of times that group was seen in the children
205 //for instance if your children are white and black then it would return a map with 2 entries
206 // p[white] = 1 and p[black] = 1.  Now go up a level and merge that with a node who has p[white] = 1
207 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
208
209 map<string, int> Tree::mergeUserGroups(int i, vector<string> g) {
210         try {
211         
212                 int lc = tree[i].getLChild();
213                 int rc = tree[i].getRChild();
214                 
215                 //loop through nodes groups removing the ones the user doesn't want
216                 for (it = tree[lc].pGroups.begin(); it != tree[lc].pGroups.end(); it++) {
217                         if (inUsersGroups(it->first, g) != true) { tree[lc].pGroups.erase(it->first); }
218                 }
219                 
220                 //loop through nodes groups removing the ones the user doesn't want
221                 for (it = tree[rc].pGroups.begin(); it != tree[rc].pGroups.end(); it++) {
222                         if (inUsersGroups(it->first, g) != true) { tree[rc].pGroups.erase(it->first); }
223                 }
224
225                 //set parsimony groups to left child
226                 map<string,int> parsimony = tree[lc].pGroups;
227                 
228                 int maxPars = 1;
229
230                 //look at right child groups and update maxPars if right child has something higher for that group.
231                 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
232                         it2 = parsimony.find(it->first);
233                         if (it2 != parsimony.end()) {
234                                 parsimony[it->first]++;
235                         }else {
236                                 parsimony[it->first] = 1;
237                         }
238                         
239                         if(parsimony[it->first] > maxPars){
240                                 maxPars = parsimony[it->first];
241                         }
242                 }
243                         
244                 // this is true if right child had a greater parsimony for a certain group
245                 if(maxPars > 1){
246                         //erase all the groups that are only 1 because you found something with 2.
247                         for(it=parsimony.begin();it!=parsimony.end();it++){
248                                 if(it->second == 1){
249                                         parsimony.erase(it->first);
250                                 }
251                         }
252                         for(it=parsimony.begin();it!=parsimony.end();it++){
253                                 parsimony[it->first] = 1;
254                         }
255                 }               
256                 
257                 return parsimony;
258         }
259         catch(exception& e) {
260                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
261                 exit(1);
262         }
263         catch(...) {
264                 cout << "An unknown error has occurred in the Tree class function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
265                 exit(1);
266         }               
267 }
268
269
270 /**************************************************************************************************/
271
272 map<string,int> Tree::mergeGcounts(int position) {
273         try{
274                 map<string,int>::iterator pos;
275         
276                 int lc = tree[position].getLChild();
277                 int rc = tree[position].getRChild();
278         
279                 map<string,int> sum = tree[lc].pcount;
280     
281                 for(it=tree[rc].pcount.begin();it!=tree[rc].pcount.end();it++){
282                         sum[it->first] += it->second;
283                 }
284                 return sum;
285         }
286         catch(exception& e) {
287                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGcounts. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
288                 exit(1);
289         }
290         catch(...) {
291                 cout << "An unknown error has occurred in the Tree class function mergeGcounts. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
292                 exit(1);
293         }               
294 }
295 /**************************************************************************************************/
296
297 void Tree::randomLabels(vector<string> g) {
298         try {
299                 
300                 for(int i = 0; i < numLeaves; i++){
301                         int z;
302                         //get random index to switch with
303                         z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));        
304                         
305                         //you only want to randomize the nodes that are from a group the user wants analyzed, so
306                         //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them.
307                         bool treez, treei;
308                 
309                         treez = inUsersGroups(tree[z].getGroup(), g);
310                         treei = inUsersGroups(tree[i].getGroup(), g);
311                         
312                         if ((treez == true) && (treei == true)) {
313                                 //switches node i and node z's info.
314                                 map<string,int> lib_hold = tree[z].pGroups;
315                                 tree[z].pGroups = (tree[i].pGroups);
316                                 tree[i].pGroups = (lib_hold);
317                                 
318                                 string zgroup = tree[z].getGroup();
319                                 tree[z].setGroup(tree[i].getGroup());
320                                 tree[i].setGroup(zgroup);
321                                 
322                                 string zname = tree[z].getName();
323                                 tree[z].setName(tree[i].getName());
324                                 tree[i].setName(zname);
325                                 
326                                 map<string,int> gcount_hold = tree[z].pcount;
327                                 tree[z].pcount = (tree[i].pcount);
328                                 tree[i].pcount = (gcount_hold);
329                         }
330                 }
331         }
332         catch(exception& e) {
333                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
334                 exit(1);
335         }
336         catch(...) {
337                 cout << "An unknown error has occurred in the Tree class function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
338                 exit(1);
339         }               
340 }
341 /**************************************************************************************************/
342
343 void Tree::randomLabels(string groupA, string groupB) {
344         try {
345                 int numSeqsA = globaldata->gTreemap->seqsPerGroup[groupA];
346                 int numSeqsB = globaldata->gTreemap->seqsPerGroup[groupB];
347
348                 vector<string> randomGroups(numSeqsA+numSeqsB, groupA);
349                 for(int i=numSeqsA;i<randomGroups.size();i++){
350                         randomGroups[i] = groupB;
351                 }
352                 random_shuffle(randomGroups.begin(), randomGroups.end());
353                                 
354                 int randomCounter = 0;                          
355                 for(int i=0;i<numLeaves;i++){
356                         if(tree[i].getGroup() == groupA || tree[i].getGroup() == groupB){
357                                 tree[i].setGroup(randomGroups[randomCounter]);
358                                 tree[i].pcount.clear();
359                                 tree[i].pcount[randomGroups[randomCounter]] = 1;
360                                 tree[i].pGroups.clear();
361                                 tree[i].pGroups[randomGroups[randomCounter]] = 1;
362                                 randomCounter++;
363                         }
364                 }
365         }               
366         catch(exception& e) {
367                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
368                 exit(1);
369         }
370         catch(...) {
371                 cout << "An unknown error has occurred in the Tree class function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
372                 exit(1);
373         }               
374 }
375 /**************************************************************************************************/
376 void Tree::randomBlengths()  {
377         try {
378                 for(int i=numNodes-1;i>=0;i--){
379                         int z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));    
380                 
381                         float bl_hold = tree[z].getBranchLength();
382                         tree[z].setBranchLength(tree[i].getBranchLength());
383                         tree[i].setBranchLength(bl_hold);
384                 }
385         }
386         catch(exception& e) {
387                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomBlengths. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
388                 exit(1);
389         }
390         catch(...) {
391                 cout << "An unknown error has occurred in the Tree class function randomBlengths. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
392                 exit(1);
393         }               
394 }
395 /*************************************************************************************************/
396 void Tree::assembleRandomUnifracTree(vector<string> g) {
397         randomLabels(g);
398         assembleTree();
399 }
400 /*************************************************************************************************/
401 void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
402         randomLabels(groupA, groupB);
403         assembleTree();
404 }
405
406 /*************************************************************************************************/
407 //for now it's just random topology but may become random labels as well later that why this is such a simple function now...
408 void Tree::assembleRandomTree() {
409         randomTopology();
410         assembleTree();
411 }
412 /**************************************************************************************************/
413
414 void Tree::randomTopology() {
415         try {
416                 for(int i=0;i<numNodes;i++){
417                         tree[i].setParent(-1);
418                 }
419                 for(int i=numLeaves;i<numNodes;i++){
420                         tree[i].setChildren(-1, -1); 
421                 }
422     
423                 for(int i=numLeaves;i<numNodes;i++){
424                         int escape =0;
425                         int rnd_index1, rnd_index2;
426                         while(escape == 0){
427                                 rnd_index1 = (int)(((double)rand() / (double) RAND_MAX)*i);
428                                 if(tree[rnd_index1].getParent() == -1){escape = 1;}
429                         }
430                 
431                         escape = 0;
432                         while(escape == 0){
433                                 rnd_index2 = (int)(((double)rand() / (double) RAND_MAX)*i);
434                                 if(rnd_index2 != rnd_index1 && tree[rnd_index2].getParent() == -1){
435                                         escape = 1;
436                                 }               
437                         }
438                 
439                         tree[i].setChildren(rnd_index1,rnd_index2);
440                         tree[i].setParent(-1);
441                         tree[rnd_index1].setParent(i);
442                         tree[rnd_index2].setParent(i);
443                 }
444         }
445         catch(exception& e) {
446                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomTopology. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
447                 exit(1);
448         }
449         catch(...) {
450                 cout << "An unknown error has occurred in the Tree class function randomTopology. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
451                 exit(1);
452         }               
453 }
454
455 /*****************************************************************/
456 // This prints out the tree in Newick form.
457 void Tree::createNewickFile(string f) {
458         try {
459                 int root = findRoot();
460                 //filename = getRootName(globaldata->getTreeFile()) + "newick";
461                 filename = f;
462                 openOutputFile(filename, out);
463                 
464                 printBranch(root);
465                 
466                 // you are at the end of the tree
467                 out << ";" << endl;
468                 out.close();
469         }
470         catch(exception& e) {
471                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function createNewickFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
472                 exit(1);
473         }
474         catch(...) {
475                 cout << "An unknown error has occurred in the Tree class function createNewickFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
476                 exit(1);
477         }               
478 }
479
480 /*****************************************************************/
481 //This function finds the index of the root node.
482
483 int Tree::findRoot() {
484         try {
485                 for (int i = 0; i < numNodes; i++) {
486                         //you found the root
487                         if (tree[i].getParent() == -1) { return i; }
488                 }
489                 return -1;
490         }
491         catch(exception& e) {
492                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function findRoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
493                 exit(1);
494         }
495         catch(...) {
496                 cout << "An unknown error has occurred in the Tree class function findRoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
497                 exit(1);
498         }               
499 }
500
501 /*****************************************************************/
502 void Tree::printBranch(int node) {
503         try {
504                 
505                 // you are not a leaf
506                 if (tree[node].getLChild() != -1) {
507                         out << "(";
508                         printBranch(tree[node].getLChild());
509                         out << ",";
510                         printBranch(tree[node].getRChild());
511                         out << ")";
512                 }else { //you are a leaf
513                         out << tree[node].getGroup() << ":" << tree[node].getBranchLength();
514                 }
515                 
516         }
517         catch(exception& e) {
518                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
519                 exit(1);
520         }
521         catch(...) {
522                 cout << "An unknown error has occurred in the Tree class function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
523                 exit(1);
524         }               
525 }
526
527 /*****************************************************************/
528
529 void Tree::printTree() {
530         
531         for(int i=0;i<numNodes;i++){
532                 cout << i << '\t';
533                 tree[i].printNode();
534         }
535         
536 }
537
538 /*****************************************************************/
539
540