]> git.donarmstrong.com Git - mothur.git/blob - tree.cpp
added logfile feature
[mothur.git] / tree.cpp
1 /*
2  *  tree.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/22/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "tree.h"
11
12
13 /*****************************************************************/
14 Tree::Tree() {
15         try {
16                 globaldata = GlobalData::getInstance();
17                 
18                 if (globaldata->runParse == true) {  parseTreeFile();  globaldata->runParse = false;  }
19                 
20                 numLeaves = globaldata->Treenames.size();
21                 numNodes = 2*numLeaves - 1;
22                 
23                 tree.resize(numNodes);
24
25                 //initialize tree with correct number of nodes, name and group info.
26                 for (int i = 0; i < numNodes; i++) {
27                         //initialize leaf nodes
28                         if (i <= (numLeaves-1)) {
29                                 tree[i].setName(globaldata->Treenames[i]);
30                                 tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->Treenames[i]));
31                                 //set pcount and pGroup for groupname to 1.
32                                 tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
33                                 tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
34                                 //Treemap knows name, group and index to speed up search
35                                 globaldata->gTreemap->setIndex(globaldata->Treenames[i], i);
36         
37                         //intialize non leaf nodes
38                         }else if (i > (numLeaves-1)) {
39                                 tree[i].setName("");
40                                 tree[i].setGroup("");
41                         }
42                 }
43         }
44         catch(exception& e) {
45                 errorOut(e, "Tree", "Tree");
46                 exit(1);
47         }
48 }
49
50 /*****************************************************************/
51 Tree::~Tree() {}
52 /*****************************************************************/
53 int Tree::getIndex(string searchName) {
54         try {
55                 //Treemap knows name, group and index to speed up search
56                 // getIndex function will return the vector index or -1 if seq is not found.
57                 int index = globaldata->gTreemap->getIndex(searchName);
58                 return index;
59                 
60         }
61         catch(exception& e) {
62                 errorOut(e, "Tree", "getIndex");
63                 exit(1);
64         }
65 }
66 /*****************************************************************/
67
68 void Tree::setIndex(string searchName, int index) {
69         try {
70                 //set index in treemap
71                 globaldata->gTreemap->setIndex(searchName, index);
72         }
73         catch(exception& e) {
74                 errorOut(e, "Tree", "setIndex");
75                 exit(1);
76         }
77 }
78 /*****************************************************************/
79 void Tree::assembleTree() {
80         try {
81                 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
82                 for (int i = numLeaves; i < numNodes; i++) {
83                         tree[i].pGroups = (mergeGroups(i));
84                         tree[i].pcount = (mergeGcounts(i));
85                 }
86         }
87         catch(exception& e) {
88                 errorOut(e, "Tree", "assembleTree");
89                 exit(1);
90         }
91 }
92 /*****************************************************************/
93 void Tree::getCopy(Tree* copy) {
94         try {
95         
96                 //for each node in the tree copy its info
97                 for (int i = 0; i < numNodes; i++) {
98                         //copy name
99                         tree[i].setName(copy->tree[i].getName());
100                 
101                         //copy group
102                         tree[i].setGroup(copy->tree[i].getGroup());
103                         
104                         //copy branch length
105                         tree[i].setBranchLength(copy->tree[i].getBranchLength());
106                 
107                         //copy parent
108                         tree[i].setParent(copy->tree[i].getParent());
109                 
110                         //copy children
111                         tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
112                 
113                         //copy index in node and tmap
114                         tree[i].setIndex(copy->tree[i].getIndex());
115                         setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
116                         
117                         //copy pGroups
118                         tree[i].pGroups = copy->tree[i].pGroups;
119                 
120                         //copy pcount
121                         tree[i].pcount = copy->tree[i].pcount;
122                 }
123         }
124         catch(exception& e) {
125                 errorOut(e, "Tree", "getCopy");
126                 exit(1);
127         }
128 }
129 /*****************************************************************/
130 //returns a map with a groupname and the number of times that group was seen in the children
131 //for instance if your children are white and black then it would return a map with 2 entries
132 // p[white] = 1 and p[black] = 1.  Now go up a level and merge that with a node who has p[white] = 1
133 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
134
135 map<string, int> Tree::mergeGroups(int i) {
136         try {
137                 int lc = tree[i].getLChild();
138                 int rc = tree[i].getRChild();
139
140                 //set parsimony groups to left child
141                 map<string,int> parsimony = tree[lc].pGroups;
142                 
143                 int maxPars = 1;
144
145                 //look at right child groups and update maxPars if right child has something higher for that group.
146                 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
147                         it2 = parsimony.find(it->first);
148                         if (it2 != parsimony.end()) {
149                                 parsimony[it->first]++;
150                         }else {
151                                 parsimony[it->first] = 1;
152                         }
153                         
154                         if(parsimony[it->first] > maxPars){
155                                 maxPars = parsimony[it->first];
156                         }
157                 }
158         
159                 // this is true if right child had a greater parsimony for a certain group
160                 if(maxPars > 1){
161                         //erase all the groups that are only 1 because you found something with 2.
162                         for(it=parsimony.begin();it!=parsimony.end();it++){
163                                 if(it->second == 1){
164                                         parsimony.erase(it->first);
165 //                                      it--;
166                                 }
167                         }
168                         //set one remaining groups to 1
169                         //so with our above example p[white] = 2 would be left and it would become p[white] = 1
170                         for(it=parsimony.begin();it!=parsimony.end();it++){
171                                 parsimony[it->first] = 1;
172                         }
173                 
174                 }
175         
176                 return parsimony;
177         }
178         catch(exception& e) {
179                 errorOut(e, "Tree", "mergeGroups");
180                 exit(1);
181         }
182 }
183 /*****************************************************************/
184 //returns a map with a groupname and the number of times that group was seen in the children
185 //for instance if your children are white and black then it would return a map with 2 entries
186 // p[white] = 1 and p[black] = 1.  Now go up a level and merge that with a node who has p[white] = 1
187 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
188
189 map<string, int> Tree::mergeUserGroups(int i, vector<string> g) {
190         try {
191         
192                 int lc = tree[i].getLChild();
193                 int rc = tree[i].getRChild();
194                 
195                 //loop through nodes groups removing the ones the user doesn't want
196                 for (it = tree[lc].pGroups.begin(); it != tree[lc].pGroups.end(); it++) {
197                         if (inUsersGroups(it->first, g) != true) { tree[lc].pGroups.erase(it->first); }
198                 }
199                 
200                 //loop through nodes groups removing the ones the user doesn't want
201                 for (it = tree[rc].pGroups.begin(); it != tree[rc].pGroups.end(); it++) {
202                         if (inUsersGroups(it->first, g) != true) { tree[rc].pGroups.erase(it->first); }
203                 }
204
205                 //set parsimony groups to left child
206                 map<string,int> parsimony = tree[lc].pGroups;
207                 
208                 int maxPars = 1;
209
210                 //look at right child groups and update maxPars if right child has something higher for that group.
211                 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
212                         it2 = parsimony.find(it->first);
213                         if (it2 != parsimony.end()) {
214                                 parsimony[it->first]++;
215                         }else {
216                                 parsimony[it->first] = 1;
217                         }
218                         
219                         if(parsimony[it->first] > maxPars){
220                                 maxPars = parsimony[it->first];
221                         }
222                 }
223                         
224                 // this is true if right child had a greater parsimony for a certain group
225                 if(maxPars > 1){
226                         //erase all the groups that are only 1 because you found something with 2.
227                         for(it=parsimony.begin();it!=parsimony.end();it++){
228                                 if(it->second == 1){
229                                         parsimony.erase(it->first);
230                                 }
231                         }
232                         for(it=parsimony.begin();it!=parsimony.end();it++){
233                                 parsimony[it->first] = 1;
234                         }
235                 }               
236                 
237                 return parsimony;
238         }
239         catch(exception& e) {
240                 errorOut(e, "Tree", "mergeUserGroups");
241                 exit(1);
242         }
243 }
244
245
246 /**************************************************************************************************/
247
248 map<string,int> Tree::mergeGcounts(int position) {
249         try{
250                 map<string,int>::iterator pos;
251         
252                 int lc = tree[position].getLChild();
253                 int rc = tree[position].getRChild();
254         
255                 map<string,int> sum = tree[lc].pcount;
256     
257                 for(it=tree[rc].pcount.begin();it!=tree[rc].pcount.end();it++){
258                         sum[it->first] += it->second;
259                 }
260                 return sum;
261         }
262         catch(exception& e) {
263                 errorOut(e, "Tree", "mergeGcounts");
264                 exit(1);
265         }
266 }
267 /**************************************************************************************************/
268
269 void Tree::randomLabels(vector<string> g) {
270         try {
271                 
272                 for(int i = 0; i < numLeaves; i++){
273                         int z;
274                         //get random index to switch with
275                         z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));        
276                         
277                         //you only want to randomize the nodes that are from a group the user wants analyzed, so
278                         //if either of the leaf nodes you are about to switch are not in the users groups then you don't want to switch them.
279                         bool treez, treei;
280                 
281                         treez = inUsersGroups(tree[z].getGroup(), g);
282                         treei = inUsersGroups(tree[i].getGroup(), g);
283                         
284                         if ((treez == true) && (treei == true)) {
285                                 //switches node i and node z's info.
286                                 map<string,int> lib_hold = tree[z].pGroups;
287                                 tree[z].pGroups = (tree[i].pGroups);
288                                 tree[i].pGroups = (lib_hold);
289                                 
290                                 string zgroup = tree[z].getGroup();
291                                 tree[z].setGroup(tree[i].getGroup());
292                                 tree[i].setGroup(zgroup);
293                                 
294                                 string zname = tree[z].getName();
295                                 tree[z].setName(tree[i].getName());
296                                 tree[i].setName(zname);
297                                 
298                                 map<string,int> gcount_hold = tree[z].pcount;
299                                 tree[z].pcount = (tree[i].pcount);
300                                 tree[i].pcount = (gcount_hold);
301                         }
302                 }
303         }
304         catch(exception& e) {
305                 errorOut(e, "Tree", "randomLabels");
306                 exit(1);
307         }
308 }
309 /**************************************************************************************************/
310
311 void Tree::randomLabels(string groupA, string groupB) {
312         try {
313                 int numSeqsA = globaldata->gTreemap->seqsPerGroup[groupA];
314                 int numSeqsB = globaldata->gTreemap->seqsPerGroup[groupB];
315
316                 vector<string> randomGroups(numSeqsA+numSeqsB, groupA);
317                 for(int i=numSeqsA;i<randomGroups.size();i++){
318                         randomGroups[i] = groupB;
319                 }
320                 random_shuffle(randomGroups.begin(), randomGroups.end());
321                                 
322                 int randomCounter = 0;                          
323                 for(int i=0;i<numLeaves;i++){
324                         if(tree[i].getGroup() == groupA || tree[i].getGroup() == groupB){
325                                 tree[i].setGroup(randomGroups[randomCounter]);
326                                 tree[i].pcount.clear();
327                                 tree[i].pcount[randomGroups[randomCounter]] = 1;
328                                 tree[i].pGroups.clear();
329                                 tree[i].pGroups[randomGroups[randomCounter]] = 1;
330                                 randomCounter++;
331                         }
332                 }
333         }               
334         catch(exception& e) {
335                 errorOut(e, "Tree", "randomLabels");
336                 exit(1);
337         }
338 }
339 /**************************************************************************************************/
340 void Tree::randomBlengths()  {
341         try {
342                 for(int i=numNodes-1;i>=0;i--){
343                         int z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));    
344                 
345                         float bl_hold = tree[z].getBranchLength();
346                         tree[z].setBranchLength(tree[i].getBranchLength());
347                         tree[i].setBranchLength(bl_hold);
348                 }
349         }
350         catch(exception& e) {
351                 errorOut(e, "Tree", "randomBlengths");
352                 exit(1);
353         }
354 }
355 /*************************************************************************************************/
356 void Tree::assembleRandomUnifracTree(vector<string> g) {
357         randomLabels(g);
358         assembleTree();
359 }
360 /*************************************************************************************************/
361 void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
362         randomLabels(groupA, groupB);
363         assembleTree();
364 }
365
366 /*************************************************************************************************/
367 //for now it's just random topology but may become random labels as well later that why this is such a simple function now...
368 void Tree::assembleRandomTree() {
369         randomTopology();
370         assembleTree();
371 }
372 /**************************************************************************************************/
373
374 void Tree::randomTopology() {
375         try {
376                 for(int i=0;i<numNodes;i++){
377                         tree[i].setParent(-1);
378                 }
379                 for(int i=numLeaves;i<numNodes;i++){
380                         tree[i].setChildren(-1, -1); 
381                 }
382     
383                 for(int i=numLeaves;i<numNodes;i++){
384                         int escape =0;
385                         int rnd_index1, rnd_index2;
386                         while(escape == 0){
387                                 rnd_index1 = (int)(((double)rand() / (double) RAND_MAX)*i);
388                                 if(tree[rnd_index1].getParent() == -1){escape = 1;}
389                         }
390                 
391                         escape = 0;
392                         while(escape == 0){
393                                 rnd_index2 = (int)(((double)rand() / (double) RAND_MAX)*i);
394                                 if(rnd_index2 != rnd_index1 && tree[rnd_index2].getParent() == -1){
395                                         escape = 1;
396                                 }               
397                         }
398                 
399                         tree[i].setChildren(rnd_index1,rnd_index2);
400                         tree[i].setParent(-1);
401                         tree[rnd_index1].setParent(i);
402                         tree[rnd_index2].setParent(i);
403                 }
404         }
405         catch(exception& e) {
406                 errorOut(e, "Tree", "randomTopology");
407                 exit(1);
408         }
409 }
410 /*****************************************************************/
411 void Tree::print(ostream& out) {
412         try {
413                 int root = findRoot();
414                 printBranch(root, out, "branch");
415                 out << ";" << endl;
416         }
417         catch(exception& e) {
418                 errorOut(e, "Tree", "print");
419                 exit(1);
420         }
421 }
422 /*****************************************************************/
423 void Tree::printForBoot(ostream& out) {
424         try {
425                 int root = findRoot();
426                 printBranch(root, out, "boot");
427                 out << ";" << endl;
428         }
429         catch(exception& e) {
430                 errorOut(e, "Tree", "printForBoot");
431                 exit(1);
432         }
433 }
434
435 /*****************************************************************/
436 // This prints out the tree in Newick form.
437 void Tree::createNewickFile(string f) {
438         try {
439                 int root = findRoot();
440                 //filename = getRootName(globaldata->getTreeFile()) + "newick";
441                 filename = f;
442
443                 openOutputFile(filename, out);
444                 
445                 printBranch(root, out, "branch");
446                 
447                 // you are at the end of the tree
448                 out << ";" << endl;
449                 out.close();
450         }
451         catch(exception& e) {
452                 errorOut(e, "Tree", "createNewickFile");
453                 exit(1);
454         }
455 }
456
457 /*****************************************************************/
458 //This function finds the index of the root node.
459
460 int Tree::findRoot() {
461         try {
462                 for (int i = 0; i < numNodes; i++) {
463                         //you found the root
464                         if (tree[i].getParent() == -1) { return i; }
465                         //cout << "i = " << i << endl;
466                         //cout << "i's parent = " << tree[i].getParent() << endl;  
467                 }
468                 return -1;
469         }
470         catch(exception& e) {
471                 errorOut(e, "Tree", "findRoot");
472                 exit(1);
473         }
474 }
475
476 /*****************************************************************/
477 void Tree::printBranch(int node, ostream& out, string mode) {
478         try {
479                 
480                 // you are not a leaf
481                 if (tree[node].getLChild() != -1) {
482                         out << "(";
483                         printBranch(tree[node].getLChild(), out, mode);
484                         out << ",";
485                         printBranch(tree[node].getRChild(), out, mode);
486                         out << ")";
487                         if (mode == "branch") {
488                                 //if there is a branch length then print it
489                                 if (tree[node].getBranchLength() != -1) {
490                                         out << ":" << tree[node].getBranchLength();
491                                 }
492                         }else if (mode == "boot") {
493                                 //if there is a label then print it
494                                 if (tree[node].getLabel() != -1) {
495                                         out << tree[node].getLabel();
496                                 }
497                         }
498                 }else { //you are a leaf
499                         out << tree[node].getGroup(); 
500                         if (mode == "branch") {
501                                 //if there is a branch length then print it
502                                 if (tree[node].getBranchLength() != -1) {
503                                         out << ":" << tree[node].getBranchLength();
504                                 }
505                         }else if (mode == "boot") {
506                                 //if there is a label then print it
507                                 if (tree[node].getLabel() != -1) {
508                                         out << tree[node].getLabel();
509                                 }
510                         }
511                 }
512                 
513         }
514         catch(exception& e) {
515                 errorOut(e, "Tree", "printBranch");
516                 exit(1);
517         }
518 }
519
520 /*****************************************************************/
521
522 void Tree::printTree() {
523         
524         for(int i=0;i<numNodes;i++){
525                 cout << i << '\t';
526                 tree[i].printNode();
527         }
528         
529 }
530
531 /*****************************************************************/
532
533 void Tree::parseTreeFile() {
534         
535         //only takes names from the first tree and assumes that all trees use the same names.
536         try {
537                 string filename = globaldata->getTreeFile();
538                 ifstream filehandle;
539                 openInputFile(filename, filehandle);
540                 int c, comment;
541                 comment = 0;
542                 
543                 //ifyou are not a nexus file 
544                 if((c = filehandle.peek()) != '#') {  
545                         while((c = filehandle.peek()) != ';') { 
546                                 while ((c = filehandle.peek()) != ';') {
547                                         // get past comments
548                                         if(c == '[') {
549                                                 comment = 1;
550                                         }
551                                         if(c == ']'){
552                                                 comment = 0;
553                                         }
554                                         if((c == '(') && (comment != 1)){ break; }
555                                         filehandle.get();
556                                 }
557
558                                 readTreeString(filehandle); 
559                         }
560                 //ifyou are a nexus file
561                 }else if((c = filehandle.peek()) == '#') {
562                         string holder = "";
563                                         
564                         // get past comments
565                         while(holder != "translate" && holder != "Translate"){  
566                                 if(holder == "[" || holder == "[!"){
567                                         comment = 1;
568                                 }
569                                 if(holder == "]"){
570                                         comment = 0;
571                                 }
572                                 filehandle >> holder; 
573         
574                                 //ifthere is no translate then you must read tree string otherwise use translate to get names
575                                 if(holder == "tree" && comment != 1){   
576                                         //pass over the "tree rep.6878900 = "
577                                         while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF)) {;}
578
579                                         if(c == EOF) { break; }
580                                         filehandle.putback(c);  //put back first ( of tree.
581                                         readTreeString(filehandle);     
582                                         break;
583                                 }
584                         }
585                         
586                         //use nexus translation rather than parsing tree to save time
587                         if((holder == "translate") || (holder == "Translate")) {
588
589                                 string number, name, h;
590                                 h = ""; // so it enters the loop the first time
591                                 while((h != ";") && (number != ";")) { 
592                                         filehandle >> number;
593                                         filehandle >> name;
594         
595                                         //c = , until done with translation then c = ;
596                                         h = name.substr(name.length()-1, name.length()); 
597                                         name.erase(name.end()-1);  //erase the comma
598                                         globaldata->Treenames.push_back(number);
599                                 }
600                                 if(number == ";") { globaldata->Treenames.pop_back(); }  //in case ';' from translation is on next line instead of next to last name
601                         }
602                 }
603                 filehandle.close();
604         }
605         catch(exception& e) {
606                 errorOut(e, "Tree", "parseTreeFile");
607                 exit(1);
608         }
609 }
610 /*******************************************************/
611
612 /*******************************************************/
613 void Tree::readTreeString(ifstream& filehandle) {
614         try {
615                 int c;
616                 string name;// k;
617                 
618                 while((c = filehandle.peek()) != ';') { 
619 //k = c;
620 //cout << " at beginning of while " <<  k << endl;                      
621                         if(c == ')')  {    
622                                 //to pass over labels in trees
623                                 c=filehandle.get();
624                                 while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ c=filehandle.get(); }
625                                 filehandle.putback(c);
626                         }
627                         if(c == ';') { break; }
628                         //if you are a name
629                         if((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != '\t') && (c != 32)) { //32 is space
630                                 name = "";
631                                 c = filehandle.get();
632                         //k = c;
633 //cout << k << endl;
634                                 while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) {                       
635                                         name += c;
636                                         c = filehandle.get();
637                         //k = c;
638 //cout << " in name while " << k << endl;
639                                 }
640                                 
641 //cout << "name = " << name << endl;
642                                 globaldata->Treenames.push_back(name);
643                                 filehandle.putback(c);
644 //k = c;
645 //cout << " after putback" <<  k << endl;
646                         } 
647                         
648                         if(c  == ':') { //read until you reach the end of the branch length
649                                 while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) {
650                                         c = filehandle.get();
651                                         //k = c;
652         //cout << " in branch while " << k << endl;
653                                 }
654                                 filehandle.putback(c);
655                         }
656                         
657                         c = filehandle.get();
658                         if(c == ';') { break; }
659                         if(c == ')') { filehandle.putback(c); }
660         //              k = c;
661 //cout << k << endl;
662
663                 }
664         }
665         catch(exception& e) {
666                 errorOut(e, "Tree", "readTreeString");
667                 exit(1);
668         }
669 }       
670
671 /*******************************************************/
672
673 /*******************************************************/
674