]> git.donarmstrong.com Git - mothur.git/blob - tree.cpp
bugs fixes while testing for 1.5 release
[mothur.git] / tree.cpp
1 /*
2  *  tree.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/22/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "tree.h"
11
12
13 /*****************************************************************/
14 Tree::Tree() {
15         try {
16                 globaldata = GlobalData::getInstance();
17                 
18                 if (globaldata->runParse == true) {  parseTreeFile();  globaldata->runParse = false;  }
19 //for(int i = 0; i <    globaldata->Treenames.size(); i++) { cout << i << '\t' << globaldata->Treenames[i] << endl;  }  
20                 numLeaves = globaldata->Treenames.size();
21                 numNodes = 2*numLeaves - 1;
22                 
23                 tree.resize(numNodes);
24
25                 //initialize tree with correct number of nodes, name and group info.
26                 for (int i = 0; i < numNodes; i++) {
27                         //initialize leaf nodes
28                         if (i <= (numLeaves-1)) {
29                                 tree[i].setName(globaldata->Treenames[i]);
30                                 tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->Treenames[i]));
31                                 //set pcount and pGroup for groupname to 1.
32                                 tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
33                                 tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
34                                 //Treemap knows name, group and index to speed up search
35                                 globaldata->gTreemap->setIndex(globaldata->Treenames[i], i);
36         
37                         //intialize non leaf nodes
38                         }else if (i > (numLeaves-1)) {
39                                 tree[i].setName("");
40                                 tree[i].setGroup("");
41                         }
42                 }
43         }
44         catch(exception& e) {
45                 errorOut(e, "Tree", "Tree");
46                 exit(1);
47         }
48 }
49
50 /*****************************************************************/
51 Tree::~Tree() {}
52 /*****************************************************************/
53 int Tree::getIndex(string searchName) {
54         try {
55                 //Treemap knows name, group and index to speed up search
56                 // getIndex function will return the vector index or -1 if seq is not found.
57                 int index = globaldata->gTreemap->getIndex(searchName);
58                 return index;
59                 
60         }
61         catch(exception& e) {
62                 errorOut(e, "Tree", "getIndex");
63                 exit(1);
64         }
65 }
66 /*****************************************************************/
67
68 void Tree::setIndex(string searchName, int index) {
69         try {
70                 //set index in treemap
71                 globaldata->gTreemap->setIndex(searchName, index);
72         }
73         catch(exception& e) {
74                 errorOut(e, "Tree", "setIndex");
75                 exit(1);
76         }
77 }
78 /*****************************************************************/
79 void Tree::assembleTree() {
80         try {
81                 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
82                 for (int i = numLeaves; i < numNodes; i++) {
83                         tree[i].pGroups = (mergeGroups(i));
84                         tree[i].pcount = (mergeGcounts(i));
85                 }
86         }
87         catch(exception& e) {
88                 errorOut(e, "Tree", "assembleTree");
89                 exit(1);
90         }
91 }
92 /*****************************************************************/
93 void Tree::getCopy(Tree* copy) {
94         try {
95         
96                 //for each node in the tree copy its info
97                 for (int i = 0; i < numNodes; i++) {
98                         //copy name
99                         tree[i].setName(copy->tree[i].getName());
100                 
101                         //copy group
102                         tree[i].setGroup(copy->tree[i].getGroup());
103                         
104                         //copy branch length
105                         tree[i].setBranchLength(copy->tree[i].getBranchLength());
106                 
107                         //copy parent
108                         tree[i].setParent(copy->tree[i].getParent());
109                 
110                         //copy children
111                         tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
112                 
113                         //copy index in node and tmap
114                         tree[i].setIndex(copy->tree[i].getIndex());
115                         setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
116                         
117                         //copy pGroups
118                         tree[i].pGroups = copy->tree[i].pGroups;
119                 
120                         //copy pcount
121                         tree[i].pcount = copy->tree[i].pcount;
122                 }
123         }
124         catch(exception& e) {
125                 errorOut(e, "Tree", "getCopy");
126                 exit(1);
127         }
128 }
129 /*****************************************************************/
130 //returns a map with a groupname and the number of times that group was seen in the children
131 //for instance if your children are white and black then it would return a map with 2 entries
132 // p[white] = 1 and p[black] = 1.  Now go up a level and merge that with a node who has p[white] = 1
133 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
134
135 map<string, int> Tree::mergeGroups(int i) {
136         try {
137                 int lc = tree[i].getLChild();
138                 int rc = tree[i].getRChild();
139
140                 //set parsimony groups to left child
141                 map<string,int> parsimony = tree[lc].pGroups;
142                 
143                 int maxPars = 1;
144
145                 //look at right child groups and update maxPars if right child has something higher for that group.
146                 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
147                         it2 = parsimony.find(it->first);
148                         if (it2 != parsimony.end()) {
149                                 parsimony[it->first]++;
150                         }else {
151                                 parsimony[it->first] = 1;
152                         }
153                         
154                         if(parsimony[it->first] > maxPars){
155                                 maxPars = parsimony[it->first];
156                         }
157                 }
158         
159                 // this is true if right child had a greater parsimony for a certain group
160                 if(maxPars > 1){
161                         //erase all the groups that are only 1 because you found something with 2.
162                         for(it=parsimony.begin();it!=parsimony.end();){
163                                 if(it->second == 1){
164                                         parsimony.erase(it++);
165                                 }else { it++; }
166                         }
167                         //set one remaining groups to 1
168                         //so with our above example p[white] = 2 would be left and it would become p[white] = 1
169                         for(it=parsimony.begin();it!=parsimony.end();it++){
170                                 parsimony[it->first] = 1;
171                         }
172                 
173                 }
174         
175                 return parsimony;
176         }
177         catch(exception& e) {
178                 errorOut(e, "Tree", "mergeGroups");
179                 exit(1);
180         }
181 }
182 /*****************************************************************/
183 //returns a map with a groupname and the number of times that group was seen in the children
184 //for instance if your children are white and black then it would return a map with 2 entries
185 // p[white] = 1 and p[black] = 1.  Now go up a level and merge that with a node who has p[white] = 1
186 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
187
188 map<string, int> Tree::mergeUserGroups(int i, vector<string> g) {
189         try {
190         
191                 int lc = tree[i].getLChild();
192                 int rc = tree[i].getRChild();
193                 
194                 //loop through nodes groups removing the ones the user doesn't want
195                 for(it=tree[lc].pGroups.begin();it!=tree[lc].pGroups.end();){
196                                 if (inUsersGroups(it->first, g) != true) {
197                                         tree[lc].pGroups.erase(it++);
198                                 }else { it++; }
199                 }
200
201                 //loop through nodes groups removing the ones the user doesn't want
202                 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();){
203                                 if (inUsersGroups(it->first, g) != true) {
204                                         tree[rc].pGroups.erase(it++);
205                                 }else { it++; }
206                 }
207
208                 //set parsimony groups to left child
209                 map<string,int> parsimony = tree[lc].pGroups;
210                 
211                 int maxPars = 1;
212
213                 //look at right child groups and update maxPars if right child has something higher for that group.
214                 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
215                         it2 = parsimony.find(it->first);
216                         if (it2 != parsimony.end()) {
217                                 parsimony[it->first]++;
218                         }else {
219                                 parsimony[it->first] = 1;
220                         }
221                         
222                         if(parsimony[it->first] > maxPars){
223                                 maxPars = parsimony[it->first];
224                         }
225                 }
226                         
227                 // this is true if right child had a greater parsimony for a certain group
228                 if(maxPars > 1){
229                         //erase all the groups that are only 1 because you found something with 2.
230                         for(it=parsimony.begin();it!=parsimony.end();){
231                                 if(it->second == 1){
232                                         parsimony.erase(it++);
233                                 }else { it++; }
234                         }
235
236                         for(it=parsimony.begin();it!=parsimony.end();it++){
237                                 parsimony[it->first] = 1;
238                         }
239                 }               
240                 
241                 return parsimony;
242         }
243         catch(exception& e) {
244                 errorOut(e, "Tree", "mergeUserGroups");
245                 exit(1);
246         }
247 }
248
249
250 /**************************************************************************************************/
251
252 map<string,int> Tree::mergeGcounts(int position) {
253         try{
254                 map<string,int>::iterator pos;
255         
256                 int lc = tree[position].getLChild();
257                 int rc = tree[position].getRChild();
258         
259                 map<string,int> sum = tree[lc].pcount;
260     
261                 for(it=tree[rc].pcount.begin();it!=tree[rc].pcount.end();it++){
262                         sum[it->first] += it->second;
263                 }
264                 return sum;
265         }
266         catch(exception& e) {
267                 errorOut(e, "Tree", "mergeGcounts");
268                 exit(1);
269         }
270 }
271 /**************************************************************************************************/
272
273 void Tree::randomLabels(vector<string> g) {
274         try {
275                 
276                 for(int i = 0; i < numLeaves; i++){
277                         int z;
278                         //get random index to switch with
279                         z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));        
280                         
281                         //you only want to randomize the nodes that are from a group the user wants analyzed, so
282                         //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them.
283                         bool treez, treei;
284                 
285                         treez = inUsersGroups(tree[z].getGroup(), g);
286                         treei = inUsersGroups(tree[i].getGroup(), g);
287                         
288                         if ((treez == true) && (treei == true)) {
289                                 //switches node i and node z's info.
290                                 map<string,int> lib_hold = tree[z].pGroups;
291                                 tree[z].pGroups = (tree[i].pGroups);
292                                 tree[i].pGroups = (lib_hold);
293                                 
294                                 string zgroup = tree[z].getGroup();
295                                 tree[z].setGroup(tree[i].getGroup());
296                                 tree[i].setGroup(zgroup);
297                                 
298                                 string zname = tree[z].getName();
299                                 tree[z].setName(tree[i].getName());
300                                 tree[i].setName(zname);
301                                 
302                                 map<string,int> gcount_hold = tree[z].pcount;
303                                 tree[z].pcount = (tree[i].pcount);
304                                 tree[i].pcount = (gcount_hold);
305                         }
306                 }
307         }
308         catch(exception& e) {
309                 errorOut(e, "Tree", "randomLabels");
310                 exit(1);
311         }
312 }
313 /**************************************************************************************************/
314
315 void Tree::randomLabels(string groupA, string groupB) {
316         try {
317                 int numSeqsA = globaldata->gTreemap->seqsPerGroup[groupA];
318                 int numSeqsB = globaldata->gTreemap->seqsPerGroup[groupB];
319
320                 vector<string> randomGroups(numSeqsA+numSeqsB, groupA);
321                 for(int i=numSeqsA;i<randomGroups.size();i++){
322                         randomGroups[i] = groupB;
323                 }
324                 random_shuffle(randomGroups.begin(), randomGroups.end());
325                                 
326                 int randomCounter = 0;                          
327                 for(int i=0;i<numLeaves;i++){
328                         if(tree[i].getGroup() == groupA || tree[i].getGroup() == groupB){
329                                 tree[i].setGroup(randomGroups[randomCounter]);
330                                 tree[i].pcount.clear();
331                                 tree[i].pcount[randomGroups[randomCounter]] = 1;
332                                 tree[i].pGroups.clear();
333                                 tree[i].pGroups[randomGroups[randomCounter]] = 1;
334                                 randomCounter++;
335                         }
336                 }
337         }               
338         catch(exception& e) {
339                 errorOut(e, "Tree", "randomLabels");
340                 exit(1);
341         }
342 }
343 /**************************************************************************************************/
344 void Tree::randomBlengths()  {
345         try {
346                 for(int i=numNodes-1;i>=0;i--){
347                         int z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));    
348                 
349                         float bl_hold = tree[z].getBranchLength();
350                         tree[z].setBranchLength(tree[i].getBranchLength());
351                         tree[i].setBranchLength(bl_hold);
352                 }
353         }
354         catch(exception& e) {
355                 errorOut(e, "Tree", "randomBlengths");
356                 exit(1);
357         }
358 }
359 /*************************************************************************************************/
360 void Tree::assembleRandomUnifracTree(vector<string> g) {
361         randomLabels(g);
362         assembleTree();
363 }
364 /*************************************************************************************************/
365 void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
366         randomLabels(groupA, groupB);
367         assembleTree();
368 }
369
370 /*************************************************************************************************/
371 //for now it's just random topology but may become random labels as well later that why this is such a simple function now...
372 void Tree::assembleRandomTree() {
373         randomTopology();
374         assembleTree();
375 }
376 /**************************************************************************************************/
377
378 void Tree::randomTopology() {
379         try {
380                 for(int i=0;i<numNodes;i++){
381                         tree[i].setParent(-1);
382                 }
383                 for(int i=numLeaves;i<numNodes;i++){
384                         tree[i].setChildren(-1, -1); 
385                 }
386     
387                 for(int i=numLeaves;i<numNodes;i++){
388                         int escape =0;
389                         int rnd_index1, rnd_index2;
390                         while(escape == 0){
391                                 rnd_index1 = (int)(((double)rand() / (double) RAND_MAX)*i);
392                                 if(tree[rnd_index1].getParent() == -1){escape = 1;}
393                         }
394                 
395                         escape = 0;
396                         while(escape == 0){
397                                 rnd_index2 = (int)(((double)rand() / (double) RAND_MAX)*i);
398                                 if(rnd_index2 != rnd_index1 && tree[rnd_index2].getParent() == -1){
399                                         escape = 1;
400                                 }               
401                         }
402                 
403                         tree[i].setChildren(rnd_index1,rnd_index2);
404                         tree[i].setParent(-1);
405                         tree[rnd_index1].setParent(i);
406                         tree[rnd_index2].setParent(i);
407                 }
408         }
409         catch(exception& e) {
410                 errorOut(e, "Tree", "randomTopology");
411                 exit(1);
412         }
413 }
414 /*****************************************************************/
415 void Tree::print(ostream& out) {
416         try {
417                 int root = findRoot();
418                 printBranch(root, out, "branch");
419                 out << ";" << endl;
420         }
421         catch(exception& e) {
422                 errorOut(e, "Tree", "print");
423                 exit(1);
424         }
425 }
426 /*****************************************************************/
427 void Tree::printForBoot(ostream& out) {
428         try {
429                 int root = findRoot();
430                 printBranch(root, out, "boot");
431                 out << ";" << endl;
432         }
433         catch(exception& e) {
434                 errorOut(e, "Tree", "printForBoot");
435                 exit(1);
436         }
437 }
438
439 /*****************************************************************/
440 // This prints out the tree in Newick form.
441 void Tree::createNewickFile(string f) {
442         try {
443                 int root = findRoot();
444                 //filename = getRootName(globaldata->getTreeFile()) + "newick";
445                 filename = f;
446
447                 openOutputFile(filename, out);
448                 
449                 printBranch(root, out, "branch");
450                 
451                 // you are at the end of the tree
452                 out << ";" << endl;
453                 out.close();
454         }
455         catch(exception& e) {
456                 errorOut(e, "Tree", "createNewickFile");
457                 exit(1);
458         }
459 }
460
461 /*****************************************************************/
462 //This function finds the index of the root node.
463
464 int Tree::findRoot() {
465         try {
466                 for (int i = 0; i < numNodes; i++) {
467                         //you found the root
468                         if (tree[i].getParent() == -1) { return i; }
469                         //cout << "i = " << i << endl;
470                         //cout << "i's parent = " << tree[i].getParent() << endl;  
471                 }
472                 return -1;
473         }
474         catch(exception& e) {
475                 errorOut(e, "Tree", "findRoot");
476                 exit(1);
477         }
478 }
479
480 /*****************************************************************/
481 void Tree::printBranch(int node, ostream& out, string mode) {
482         try {
483                 
484                 // you are not a leaf
485                 if (tree[node].getLChild() != -1) {
486                         out << "(";
487                         printBranch(tree[node].getLChild(), out, mode);
488                         out << ",";
489                         printBranch(tree[node].getRChild(), out, mode);
490                         out << ")";
491                         if (mode == "branch") {
492                                 //if there is a branch length then print it
493                                 if (tree[node].getBranchLength() != -1) {
494                                         out << ":" << tree[node].getBranchLength();
495                                 }
496                         }else if (mode == "boot") {
497                                 //if there is a label then print it
498                                 if (tree[node].getLabel() != -1) {
499                                         out << tree[node].getLabel();
500                                 }
501                         }
502                 }else { //you are a leaf
503                         out << tree[node].getGroup(); 
504                         if (mode == "branch") {
505                                 //if there is a branch length then print it
506                                 if (tree[node].getBranchLength() != -1) {
507                                         out << ":" << tree[node].getBranchLength();
508                                 }
509                         }else if (mode == "boot") {
510                                 //if there is a label then print it
511                                 if (tree[node].getLabel() != -1) {
512                                         out << tree[node].getLabel();
513                                 }
514                         }
515                 }
516                 
517         }
518         catch(exception& e) {
519                 errorOut(e, "Tree", "printBranch");
520                 exit(1);
521         }
522 }
523
524 /*****************************************************************/
525
526 void Tree::printTree() {
527         
528         for(int i=0;i<numNodes;i++){
529                 cout << i << '\t';
530                 tree[i].printNode();
531         }
532         
533 }
534
535 /*****************************************************************/
536 //this code is a mess and should be rethought...-slw
537 void Tree::parseTreeFile() {
538         
539         //only takes names from the first tree and assumes that all trees use the same names.
540         try {
541                 string filename = globaldata->getTreeFile();
542                 ifstream filehandle;
543                 openInputFile(filename, filehandle);
544                 int c, comment;
545                 comment = 0;
546                 int done = 1;
547                 
548                 //ifyou are not a nexus file 
549                 if((c = filehandle.peek()) != '#') {  
550                         while((c = filehandle.peek()) != ';') { 
551                                 while ((c = filehandle.peek()) != ';') {
552                                         // get past comments
553                                         if(c == '[') {
554                                                 comment = 1;
555                                         }
556                                         if(c == ']'){
557                                                 comment = 0;
558                                         }
559                                         if((c == '(') && (comment != 1)){ break; }
560                                         filehandle.get();
561                                 }
562
563                                 done = readTreeString(filehandle); 
564                                 if (done == 0) { break; }
565                         }
566                 //ifyou are a nexus file
567                 }else if((c = filehandle.peek()) == '#') {
568                         string holder = "";
569                                         
570                         // get past comments
571                         while(holder != "translate" && holder != "Translate"){  
572                                 if(holder == "[" || holder == "[!"){
573                                         comment = 1;
574                                 }
575                                 if(holder == "]"){
576                                         comment = 0;
577                                 }
578                                 filehandle >> holder; 
579
580                                 //if there is no translate then you must read tree string otherwise use translate to get names
581                                 if((holder == "tree") && (comment != 1)){       
582                                         //pass over the "tree rep.6878900 = "
583                                         while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF)) {;}
584
585                                         if(c == EOF) { break; }
586                                         filehandle.putback(c);  //put back first ( of tree.
587                                         done = readTreeString(filehandle);
588         
589                                         break;
590                                 }
591                         
592                                 if (done == 0) { break;  }
593                         }
594                         
595                         //use nexus translation rather than parsing tree to save time
596                         if((holder == "translate") || (holder == "Translate")) {
597
598                                 string number, name, h;
599                                 h = ""; // so it enters the loop the first time
600                                 while((h != ";") && (number != ";")) { 
601                                         filehandle >> number;
602                                         filehandle >> name;
603         
604                                         //c = , until done with translation then c = ;
605                                         h = name.substr(name.length()-1, name.length()); 
606                                         name.erase(name.end()-1);  //erase the comma
607                                         globaldata->Treenames.push_back(number);
608                                 }
609                                 if(number == ";") { globaldata->Treenames.pop_back(); }  //in case ';' from translation is on next line instead of next to last name
610                         }
611                 }
612                 filehandle.close();
613         }
614         catch(exception& e) {
615                 errorOut(e, "Tree", "parseTreeFile");
616                 exit(1);
617         }
618 }
619 /*******************************************************/
620
621 /*******************************************************/
622 int Tree::readTreeString(ifstream& filehandle)  {
623         try {
624                 int c;
625                 string name;  //, k
626                 
627                 while((c = filehandle.peek()) != ';') { 
628 //k = c;
629 //cout << " at beginning of while " <<  k << endl;                      
630                         if(c == ')')  {    
631                                 //to pass over labels in trees
632                                 c=filehandle.get();
633                                 while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ c=filehandle.get(); }
634                                 filehandle.putback(c);
635                         }
636                         if(c == ';') { return 0; }
637                         if(c == -1) { return 0; }
638                         //if you are a name
639                         if((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != '\t') && (c != 32)) { //32 is space
640                                 name = "";
641                                 c = filehandle.get();
642                         //k = c;
643 //cout << k << endl;
644                                 while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) {                       
645                                         name += c;
646                                         c = filehandle.get();
647                         //k = c;
648 //cout << " in name while " << k << endl;
649                                 }
650                                 
651 //cout << "name = " << name << endl;
652                                 globaldata->Treenames.push_back(name);
653                                 filehandle.putback(c);
654 //k = c;
655 //cout << " after putback" <<  k << endl;
656                         } 
657                         
658                         if(c  == ':') { //read until you reach the end of the branch length
659                                 while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) {
660                                         c = filehandle.get();
661         //k = c;
662         //cout << " in branch while " << k << endl;
663                                 }
664                                 filehandle.putback(c);
665                         }
666                 
667                         c = filehandle.get();
668 //k = c;
669         //cout << " here after get " << k << endl;
670                         if(c == ';') { return 0; }
671                         if(c == ')') { filehandle.putback(c); }
672         //k = c;
673 //cout << k << endl;
674
675                 }
676                 return 0;
677         }
678         catch(exception& e) {
679                 errorOut(e, "Tree", "readTreeString");
680                 exit(1);
681         }
682 }       
683
684 /*******************************************************/
685
686 /*******************************************************/
687