]> git.donarmstrong.com Git - mothur.git/blob - tree.cpp
working on readtree
[mothur.git] / tree.cpp
1 /*
2  *  tree.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/22/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "tree.h"
11
12
13 /*****************************************************************/
14 Tree::Tree() {
15         try {
16         
17                 globaldata = GlobalData::getInstance();
18                 numLeaves = globaldata->gTreemap->getNumSeqs();
19                 numNodes = 2*numLeaves - 1;
20                 
21                 tree.resize(numNodes);
22
23                 //initialize tree with correct number of nodes, name and group info.
24                 for (int i = 0; i < numNodes; i++) {
25                         //initialize leaf nodes
26                         if (i <= (numLeaves-1)) {
27                                 tree[i].setName(globaldata->gTreemap->namesOfSeqs[i]);
28                                 tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i]));
29                                 //set pcount and pGroup for groupname to 1.
30                                 tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1;
31                                 tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1;
32                                 //Treemap knows name, group and index to speed up search
33                                 globaldata->gTreemap->setIndex(globaldata->gTreemap->namesOfSeqs[i], i);
34         
35                         //intialize non leaf nodes
36                         }else if (i > (numLeaves-1)) {
37                                 tree[i].setName("");
38                                 tree[i].setGroup("");
39                         }
40                 }
41         }
42         catch(exception& e) {
43                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function Tree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
44                 exit(1);
45         }
46         catch(...) {
47                 cout << "An unknown error has occurred in the Tree class function Tree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
48                 exit(1);
49         }               
50 }
51
52 /*****************************************************************/
53 void Tree::resetTree(){
54 try {
55                 numLeaves = globaldata->gTreemap->getNumSeqs();
56                 numNodes = 2*numLeaves - 1;
57                 
58                 tree.resize(numNodes);
59
60                 //initialize tree with correct number of nodes, name and group info.
61                 for (int i = 0; i < numNodes; i++) {
62                         //initialize leaf nodes
63                         if (i <= (numLeaves-1)) {
64                                 tree[i].setName(globaldata->gTreemap->namesOfSeqs[i]);
65                                 tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i]));
66                                 //set pcount and pGroup for groupname to 1.
67                                 tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1;
68                                 tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1;
69                                 //Treemap knows name, group and index to speed up search
70                                 globaldata->gTreemap->setIndex(globaldata->gTreemap->namesOfSeqs[i], i);
71         
72                         //intialize non leaf nodes
73                         }else if (i > (numLeaves-1)) {
74                                 tree[i].setName("");
75                                 tree[i].setGroup("");
76                         }
77                 }
78         }
79         catch(exception& e) {
80                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function resetTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
81                 exit(1);
82         }
83         catch(...) {
84                 cout << "An unknown error has occurred in the Tree class function resetTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
85                 exit(1);
86         }               
87
88 }
89 /*****************************************************************/
90 int Tree::getIndex(string searchName) {
91         try {
92                 //Treemap knows name, group and index to speed up search
93                 // getIndex function will return the vector index or -1 if seq is not found.
94                 int index = globaldata->gTreemap->getIndex(searchName);
95                 return index;
96                 
97         }
98         catch(exception& e) {
99                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function getIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
100                 exit(1);
101         }
102         catch(...) {
103                 cout << "An unknown error has occurred in the Tree class function getIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
104                 exit(1);
105         }               
106 }
107 /*****************************************************************/
108
109 void Tree::setIndex(string searchName, int index) {
110         try {
111                 //set index in treemap
112                 globaldata->gTreemap->setIndex(searchName, index);
113         }
114         catch(exception& e) {
115                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function setIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
116                 exit(1);
117         }
118         catch(...) {
119                 cout << "An unknown error has occurred in the Tree class function setIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
120                 exit(1);
121         }               
122 }
123 /*****************************************************************/
124 void Tree::assembleTree() {
125         try {
126                 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
127                 for (int i = numLeaves; i < numNodes; i++) {
128                         tree[i].pGroups = (mergeGroups(i));
129                         tree[i].pcount = (mergeGcounts(i));
130                 }
131         }
132         catch(exception& e) {
133                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function assembleTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
134                 exit(1);
135         }
136         catch(...) {
137                 cout << "An unknown error has occurred in the Tree class function assembleTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
138                 exit(1);
139         }               
140 }
141 /*****************************************************************/
142 void Tree::getCopy(Tree* copy) {
143         try {
144         
145                 //for each node in the tree copy its info
146                 for (int i = 0; i < numNodes; i++) {
147                         //copy name
148                         tree[i].setName(copy->tree[i].getName());
149                 
150                         //copy group
151                         tree[i].setGroup(copy->tree[i].getGroup());
152                         
153                         //copy branch length
154                         tree[i].setBranchLength(copy->tree[i].getBranchLength());
155                 
156                         //copy parent
157                         tree[i].setParent(copy->tree[i].getParent());
158                 
159                         //copy children
160                         tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
161                 
162                         //copy index in node and tmap
163                         tree[i].setIndex(copy->tree[i].getIndex());
164                         setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
165                         
166                         //copy pGroups
167                         tree[i].pGroups = copy->tree[i].pGroups;
168                 
169                         //copy pcount
170                         tree[i].pcount = copy->tree[i].pcount;
171                 }
172         }
173         catch(exception& e) {
174                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function getCopy. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
175                 exit(1);
176         }
177         catch(...) {
178                 cout << "An unknown error has occurred in the Tree class function getCopy. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
179                 exit(1);
180         }               
181 }
182 /*****************************************************************/
183 //returns a map with a groupname and the number of times that group was seen in the children
184 //for instance if your children are white and black then it would return a map with 2 entries
185 // p[white] = 1 and p[black] = 1.  Now go up a level and merge that with a node who has p[white] = 1
186 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
187
188 map<string, int> Tree::mergeGroups(int i) {
189         try {
190                 int lc = tree[i].getLChild();
191                 int rc = tree[i].getRChild();
192 cout << i << lc << rc << endl;
193                 //set parsimony groups to left child
194                 map<string,int> parsimony = tree[lc].pGroups;
195                 
196                 int maxPars = 1;
197
198                 //look at right child groups and update maxPars if right child has something higher for that group.
199                 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
200                         it2 = parsimony.find(it->first);
201                         if (it2 != parsimony.end()) {
202                                 parsimony[it->first]++;
203                         }else {
204                                 parsimony[it->first] = 1;
205                         }
206                         
207                         if(parsimony[it->first] > maxPars){
208                                 maxPars = parsimony[it->first];
209                         }
210                 }
211         
212                 // this is true if right child had a greater parsimony for a certain group
213                 if(maxPars > 1){
214                         //erase all the groups that are only 1 because you found something with 2.
215                         for(it=parsimony.begin();it!=parsimony.end();it++){
216                                 if(it->second == 1){
217                                         parsimony.erase(it->first);
218                                         it--;
219                                 }
220                         }
221                         //set one remaining groups to 1
222                         //so with our above example p[white] = 2 would be left and it would become p[white] = 1
223                         for(it=parsimony.begin();it!=parsimony.end();it++){
224                                 parsimony[it->first] = 1;
225                         }
226                 
227                 }
228         
229                 return parsimony;
230         }
231         catch(exception& e) {
232                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
233                 exit(1);
234         }
235         catch(...) {
236                 cout << "An unknown error has occurred in the Tree class function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
237                 exit(1);
238         }               
239 }
240 /*****************************************************************/
241 //returns a map with a groupname and the number of times that group was seen in the children
242 //for instance if your children are white and black then it would return a map with 2 entries
243 // p[white] = 1 and p[black] = 1.  Now go up a level and merge that with a node who has p[white] = 1
244 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
245
246 map<string, int> Tree::mergeUserGroups(int i, vector<string> g) {
247         try {
248         
249                 int lc = tree[i].getLChild();
250                 int rc = tree[i].getRChild();
251                 
252                 //loop through nodes groups removing the ones the user doesn't want
253                 for (it = tree[lc].pGroups.begin(); it != tree[lc].pGroups.end(); it++) {
254                         if (inUsersGroups(it->first, g) != true) { tree[lc].pGroups.erase(it->first); }
255                 }
256                 
257                 //loop through nodes groups removing the ones the user doesn't want
258                 for (it = tree[rc].pGroups.begin(); it != tree[rc].pGroups.end(); it++) {
259                         if (inUsersGroups(it->first, g) != true) { tree[rc].pGroups.erase(it->first); }
260                 }
261
262                 //set parsimony groups to left child
263                 map<string,int> parsimony = tree[lc].pGroups;
264                 
265                 int maxPars = 1;
266
267                 //look at right child groups and update maxPars if right child has something higher for that group.
268                 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
269                         it2 = parsimony.find(it->first);
270                         if (it2 != parsimony.end()) {
271                                 parsimony[it->first]++;
272                         }else {
273                                 parsimony[it->first] = 1;
274                         }
275                         
276                         if(parsimony[it->first] > maxPars){
277                                 maxPars = parsimony[it->first];
278                         }
279                 }
280         
281                 // this is true if right child had a greater parsimony for a certain group
282                 if(maxPars > 1){
283                         //erase all the groups that are only 1 because you found something with 2.
284                         for(it=parsimony.begin();it!=parsimony.end();it++){
285                                 if(it->second == 1){
286                                         parsimony.erase(it->first);
287                                         it--;
288                                 }
289                         }
290                         //set one remaining groups to 1
291                         //so with our above example p[white] = 2 would be left and it would become p[white] = 1
292                         for(it=parsimony.begin();it!=parsimony.end();it++){
293                                 parsimony[it->first] = 1;
294                         }
295                 
296                 }
297         
298                 return parsimony;
299         }
300         catch(exception& e) {
301                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
302                 exit(1);
303         }
304         catch(...) {
305                 cout << "An unknown error has occurred in the Tree class function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
306                 exit(1);
307         }               
308 }
309
310
311 /**************************************************************************************************/
312
313 map<string,int> Tree::mergeGcounts(int position) {
314         try{
315                 map<string,int>::iterator pos;
316         
317                 int lc = tree[position].getLChild();
318                 int rc = tree[position].getRChild();
319         
320                 map<string,int> sum = tree[lc].pcount;
321     
322                 for(it=tree[rc].pcount.begin();it!=tree[rc].pcount.end();it++){
323                         sum[it->first] += it->second;
324                 }
325                 return sum;
326         }
327         catch(exception& e) {
328                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGcounts. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
329                 exit(1);
330         }
331         catch(...) {
332                 cout << "An unknown error has occurred in the Tree class function mergeGcounts. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
333                 exit(1);
334         }               
335 }
336 /**************************************************************************************************/
337
338 void Tree::randomLabels(vector<string> g) {
339         try {
340                 
341                 for(int i = 0; i < numLeaves; i++){
342                         int z;
343                         //get random index to switch with
344                         z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));        
345                         
346                         //you only want to randomize the nodes that are from a group the user wants analyzed, so
347                         //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them.
348                         bool treez, treei;
349                 
350                         treez = inUsersGroups(tree[z].getGroup(), g);
351                         treei = inUsersGroups(tree[i].getGroup(), g);
352                         
353                         if ((treez == true) && (treei == true)) {
354                                 //switches node i and node z's info.
355                                 map<string,int> lib_hold = tree[z].pGroups;
356                                 tree[z].pGroups = (tree[i].pGroups);
357                                 tree[i].pGroups = (lib_hold);
358                                 
359                                 string zgroup = tree[z].getGroup();
360                                 tree[z].setGroup(tree[i].getGroup());
361                                 tree[i].setGroup(zgroup);
362                                 
363                                 string zname = tree[z].getName();
364                                 tree[z].setName(tree[i].getName());
365                                 tree[i].setName(zname);
366                                 
367                                 map<string,int> gcount_hold = tree[z].pcount;
368                                 tree[z].pcount = (tree[i].pcount);
369                                 tree[i].pcount = (gcount_hold);
370                         }
371                 }
372         }
373         catch(exception& e) {
374                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
375                 exit(1);
376         }
377         catch(...) {
378                 cout << "An unknown error has occurred in the Tree class function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
379                 exit(1);
380         }               
381 }
382 /**************************************************************************************************/
383
384 void Tree::randomLabels(string groupA, string groupB) {
385         try {
386                 int numSeqsA = globaldata->gTreemap->seqsPerGroup[groupA];
387                 int numSeqsB = globaldata->gTreemap->seqsPerGroup[groupB];
388
389                 vector<string> randomGroups(numSeqsA+numSeqsB, groupA);
390                 for(int i=numSeqsA;i<randomGroups.size();i++){
391                         randomGroups[i] = groupB;
392                 }
393                 random_shuffle(randomGroups.begin(), randomGroups.end());
394                                 
395                 int randomCounter = 0;                          
396                 for(int i=0;i<numLeaves;i++){
397                         if(tree[i].getGroup() == groupA || tree[i].getGroup() == groupB){
398                                 tree[i].setGroup(randomGroups[randomCounter]);
399                                 tree[i].pcount.clear();
400                                 tree[i].pcount[randomGroups[randomCounter]] = 1;
401                                 tree[i].pGroups.clear();
402                                 tree[i].pGroups[randomGroups[randomCounter]] = 1;
403                                 randomCounter++;
404                         }
405                 }
406         }               
407         catch(exception& e) {
408                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
409                 exit(1);
410         }
411         catch(...) {
412                 cout << "An unknown error has occurred in the Tree class function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
413                 exit(1);
414         }               
415 }
416 /**************************************************************************************************/
417 void Tree::randomBlengths()  {
418         try {
419                 for(int i=numNodes-1;i>=0;i--){
420                         int z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));    
421                 
422                         float bl_hold = tree[z].getBranchLength();
423                         tree[z].setBranchLength(tree[i].getBranchLength());
424                         tree[i].setBranchLength(bl_hold);
425                 }
426         }
427         catch(exception& e) {
428                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomBlengths. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
429                 exit(1);
430         }
431         catch(...) {
432                 cout << "An unknown error has occurred in the Tree class function randomBlengths. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
433                 exit(1);
434         }               
435 }
436 /*************************************************************************************************/
437 void Tree::assembleRandomUnifracTree(vector<string> g) {
438         randomLabels(g);
439         assembleTree();
440 }
441 /*************************************************************************************************/
442 void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
443         randomLabels(groupA, groupB);
444         assembleTree();
445 }
446
447 /*************************************************************************************************/
448 //for now it's just random topology but may become random labels as well later that why this is such a simple function now...
449 void Tree::assembleRandomTree() {
450         randomTopology();
451         assembleTree();
452 }
453 /**************************************************************************************************/
454
455 void Tree::randomTopology() {
456         try {
457                 for(int i=0;i<numNodes;i++){
458                         tree[i].setParent(-1);
459                 }
460                 for(int i=numLeaves;i<numNodes;i++){
461                         tree[i].setChildren(-1, -1); 
462                 }
463     
464                 for(int i=numLeaves;i<numNodes;i++){
465                         int escape =0;
466                         int rnd_index1, rnd_index2;
467                         while(escape == 0){
468                                 rnd_index1 = (int)(((double)rand() / (double) RAND_MAX)*i);
469                                 if(tree[rnd_index1].getParent() == -1){escape = 1;}
470                         }
471                 
472                         escape = 0;
473                         while(escape == 0){
474                                 rnd_index2 = (int)(((double)rand() / (double) RAND_MAX)*i);
475                                 if(rnd_index2 != rnd_index1 && tree[rnd_index2].getParent() == -1){
476                                         escape = 1;
477                                 }               
478                         }
479                 
480                         tree[i].setChildren(rnd_index1,rnd_index2);
481                         tree[i].setParent(-1);
482                         tree[rnd_index1].setParent(i);
483                         tree[rnd_index2].setParent(i);
484                 }
485         }
486         catch(exception& e) {
487                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomTopology. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
488                 exit(1);
489         }
490         catch(...) {
491                 cout << "An unknown error has occurred in the Tree class function randomTopology. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
492                 exit(1);
493         }               
494 }
495
496 /*****************************************************************/
497 // This prints out the tree in Newick form.
498 void Tree::createNewickFile(string f) {
499         try {
500                 int root = findRoot();
501                 //filename = getRootName(globaldata->getTreeFile()) + "newick";
502                 filename = f;
503                 openOutputFile(filename, out);
504                 
505                 printBranch(root);
506                 
507                 // you are at the end of the tree
508                 out << ";" << endl;
509                 out.close();
510         }
511         catch(exception& e) {
512                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function createNewickFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
513                 exit(1);
514         }
515         catch(...) {
516                 cout << "An unknown error has occurred in the Tree class function createNewickFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
517                 exit(1);
518         }               
519 }
520
521 /*****************************************************************/
522 //This function finds the index of the root node.
523
524 int Tree::findRoot() {
525         try {
526                 for (int i = 0; i < numNodes; i++) {
527                         //you found the root
528                         if (tree[i].getParent() == -1) { return i; }
529                 }
530                 return -1;
531         }
532         catch(exception& e) {
533                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function findRoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
534                 exit(1);
535         }
536         catch(...) {
537                 cout << "An unknown error has occurred in the Tree class function findRoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
538                 exit(1);
539         }               
540 }
541
542 /*****************************************************************/
543 void Tree::printBranch(int node) {
544         try {
545                 
546                 // you are not a leaf
547                 if (tree[node].getLChild() != -1) {
548                         out << "(";
549                         printBranch(tree[node].getLChild());
550                         out << ",";
551                         printBranch(tree[node].getRChild());
552                         out << ")";
553                 }else { //you are a leaf
554                         out << tree[node].getGroup() << ":" << tree[node].getBranchLength();
555                 }
556                 
557         }
558         catch(exception& e) {
559                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
560                 exit(1);
561         }
562         catch(...) {
563                 cout << "An unknown error has occurred in the Tree class function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
564                 exit(1);
565         }               
566 }
567
568 /*****************************************************************/
569
570 void Tree::printTree() {
571         
572         for(int i=0;i<numNodes;i++){
573                 cout << i << '\t';
574                 tree[i].printNode();
575         }
576         
577 }
578
579 /*****************************************************************/
580
581