]> git.donarmstrong.com Git - mothur.git/blob - tree.cpp
adding treeclimber and unifrac pieces
[mothur.git] / tree.cpp
1 /*
2  *  tree.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/22/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "tree.h"
11
12
13 /*****************************************************************/
14 Tree::Tree() {
15         try {
16         
17                 globaldata = GlobalData::getInstance();
18                 numLeaves = globaldata->gTreemap->getNumSeqs();
19                 numNodes = 2*numLeaves - 1;
20                 
21                 tree.resize(numNodes);
22
23                 //initialize tree with correct number of nodes, name and group info.
24                 for (int i = 0; i < numNodes; i++) {
25                         //initialize leaf nodes
26                         if (i <= (numLeaves-1)) {
27                                 tree[i].setName(globaldata->gTreemap->namesOfSeqs[i]);
28                                 tree[i].setGroup(globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i]));
29                                 //the node knows its index
30                                 tree[i].setIndex(i);
31                                 //set pcount and pGroup for groupname to 1.
32                                 tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1;
33                                 tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->gTreemap->namesOfSeqs[i])] = 1;
34                                 //Treemap knows name, group and index to speed up search
35                                 globaldata->gTreemap->setIndex(globaldata->gTreemap->namesOfSeqs[i], i);
36         
37                         //intialize non leaf nodes
38                         }else if (i > (numLeaves-1)) {
39                                 tree[i].setName("");
40                                 tree[i].setGroup("");
41                                 //the node knows its index
42                                 tree[i].setIndex(i);
43                         }
44                 }
45         }
46         catch(exception& e) {
47                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function Tree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
48                 exit(1);
49         }
50         catch(...) {
51                 cout << "An unknown error has occurred in the Tree class function Tree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
52                 exit(1);
53         }               
54 }
55
56 /*****************************************************************/
57
58 int Tree::getIndex(string searchName) {
59         try {
60                 //Treemap knows name, group and index to speed up search
61                 // getIndex function will return the vector index or -1 if seq is not found.
62                 int index = globaldata->gTreemap->getIndex(searchName);
63                 return index;
64                 
65         }
66         catch(exception& e) {
67                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function getIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
68                 exit(1);
69         }
70         catch(...) {
71                 cout << "An unknown error has occurred in the Tree class function getIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
72                 exit(1);
73         }               
74 }
75 /*****************************************************************/
76
77 void Tree::setIndex(string searchName, int index) {
78         try {
79                 //set index in treemap
80                 globaldata->gTreemap->setIndex(searchName, index);
81         }
82         catch(exception& e) {
83                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function setIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
84                 exit(1);
85         }
86         catch(...) {
87                 cout << "An unknown error has occurred in the Tree class function setIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
88                 exit(1);
89         }               
90 }
91 /*****************************************************************/
92 void Tree::assembleTree() {
93         try {
94                 //build the pGroups in non leaf nodes to be used in the parsimony calcs.
95                 for (int i = numLeaves; i < numNodes; i++) {
96                         tree[i].pGroups = (mergeGroups(i));
97                         tree[i].pcount = (mergeGcounts(i));
98                 }
99         }
100         catch(exception& e) {
101                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function assembleTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
102                 exit(1);
103         }
104         catch(...) {
105                 cout << "An unknown error has occurred in the Tree class function assembleTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
106                 exit(1);
107         }               
108 }
109 /*****************************************************************/
110 void Tree::getCopy(Tree* copy) {
111         try {
112         
113                 //for each node in the tree copy its info
114                 for (int i = 0; i < numNodes; i++) {
115                         //copy name
116                         tree[i].setName(copy->tree[i].getName());
117                 
118                         //copy group
119                         tree[i].setGroup(copy->tree[i].getGroup());
120                         
121                         //copy branch length
122                         tree[i].setBranchLength(copy->tree[i].getBranchLength());
123                 
124                         //copy parent
125                         tree[i].setParent(copy->tree[i].getParent());
126                 
127                         //copy children
128                         tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
129                 
130                         //copy index in node and tmap
131                         tree[i].setIndex(copy->tree[i].getIndex());
132                         setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
133                         
134                         //copy pGroups
135                         tree[i].pGroups = copy->tree[i].pGroups;
136                 
137                         //copy pcount
138                         tree[i].pcount = copy->tree[i].pcount;
139                 }
140         }
141         catch(exception& e) {
142                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function getCopy. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
143                 exit(1);
144         }
145         catch(...) {
146                 cout << "An unknown error has occurred in the Tree class function getCopy. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
147                 exit(1);
148         }               
149 }
150 /*****************************************************************/
151 //returns a map with a groupname and the number of times that group was seen in the children
152 //for instance if your children are white and black then it would return a map with 2 entries
153 // p[white] = 1 and p[black] = 1.  Now go up a level and merge that with a node who has p[white] = 1
154 //and you get p[white] = 2, p[black] = 1, but you erase the p[black] because you have a p value higher than 1.
155
156 map<string, int> Tree::mergeGroups(int i) {
157         try {
158         
159                 int lc = tree[i].getLChild();
160                 int rc = tree[i].getRChild();
161                 
162                 //set parsimony groups to left child
163                 map<string,int> parsimony = tree[lc].pGroups;
164                 
165                 int maxPars = 1;
166
167                 //look at right child groups and update maxPars if right child has something higher for that group.
168                 for(it=tree[rc].pGroups.begin();it!=tree[rc].pGroups.end();it++){
169                         parsimony[it->first]++;
170                         
171                         if(parsimony[it->first] > maxPars){
172                                 maxPars = parsimony[it->first];
173                         }
174                 }
175         
176                 // this is true if right child had a greater parsimony for a certain group
177                 if(maxPars > 1){
178                         //erase all the groups that are only 1 because you found something with 2.
179                         for(it=parsimony.begin();it!=parsimony.end();it++){
180                                 if(it->second == 1){
181                                         parsimony.erase(it->first);
182                                         it--;
183                                 }
184                         }
185                         //set one remaining groups to 1
186                         //so with our above example p[white] = 2 would be left and it would become p[white] = 1
187                         for(it=parsimony.begin();it!=parsimony.end();it++){
188                                 parsimony[it->first] = 1;
189                         }
190                 
191                 }
192         
193                 return parsimony;
194         }
195         catch(exception& e) {
196                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
197                 exit(1);
198         }
199         catch(...) {
200                 cout << "An unknown error has occurred in the Tree class function mergeGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
201                 exit(1);
202         }               
203 }
204
205 /**************************************************************************************************/
206
207 map<string,int> Tree::mergeGcounts(int position) {
208         try{
209                 map<string,int>::iterator pos;
210         
211                 int lc = tree[position].getLChild();
212                 int rc = tree[position].getRChild();
213         
214                 map<string,int> sum = tree[lc].pcount;
215     
216                 for(it=tree[rc].pcount.begin();it!=tree[rc].pcount.end();it++){
217                         sum[it->first] += it->second;
218                 }
219                 return sum;
220         }
221         catch(exception& e) {
222                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function mergeGcounts. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
223                 exit(1);
224         }
225         catch(...) {
226                 cout << "An unknown error has occurred in the Tree class function mergeGcounts. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
227                 exit(1);
228         }               
229 }
230 /**************************************************************************************************/
231
232 void Tree::randomLabels() {
233         try {
234                 for(int i=numLeaves-1;i>=0;i--){
235                         if(tree[i].pGroups.size() == 0){
236                                 continue;
237                         }
238                 
239                         int escape = 1;
240                         int z;
241                 
242                         while(escape == 1){
243                                 z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));        
244                         
245                                 if(tree[z].pGroups.size() != 0){
246                                         escape = 0;
247                                 }
248                         }
249                 
250                 
251                         map<string,int> lib_hold = tree[z].pGroups;
252                         tree[z].pGroups = (tree[i].pGroups);
253                         tree[i].pGroups = (lib_hold);
254                 
255                         tree[z].setGroup(tree[z].pGroups.begin()->first);
256                         tree[i].setGroup(tree[i].pGroups.begin()->first);
257                 
258                         map<string,int> gcount_hold = tree[z].pcount;
259                         tree[z].pcount = (tree[i].pcount);
260                         tree[i].pcount = (gcount_hold);
261                 }
262         }
263         catch(exception& e) {
264                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
265                 exit(1);
266         }
267         catch(...) {
268                 cout << "An unknown error has occurred in the Tree class function randomLabels. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
269                 exit(1);
270         }               
271 }
272 /**************************************************************************************************/
273
274 void Tree::randomBlengths()  {
275         try {
276                 for(int i=numNodes-1;i>=0;i--){
277                         int z = int((float)(i+1) * (float)(rand()) / ((float)RAND_MAX+1.0));    
278                 
279                         float bl_hold = tree[z].getBranchLength();
280                         tree[z].setBranchLength(tree[i].getBranchLength());
281                         tree[i].setBranchLength(bl_hold);
282                 }
283         }
284         catch(exception& e) {
285                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomBlengths. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
286                 exit(1);
287         }
288         catch(...) {
289                 cout << "An unknown error has occurred in the Tree class function randomBlengths. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
290                 exit(1);
291         }               
292 }
293 /*************************************************************************************************/
294 void Tree::assembleRandomUnifracTree() {
295         randomLabels();
296         assembleTree();
297 }
298
299 /*************************************************************************************************/
300 //for now it's just random topology but may become random labels as well later that why this is such a simple function now...
301 void Tree::assembleRandomTree() {
302         randomTopology();
303         assembleTree();
304 }
305 /**************************************************************************************************/
306
307 void Tree::randomTopology() {
308         try {
309                 for(int i=0;i<numNodes;i++){
310                         tree[i].setParent(-1);
311                 }
312                 for(int i=numLeaves;i<numNodes;i++){
313                         tree[i].setChildren(-1, -1); 
314                 }
315     
316                 for(int i=numLeaves;i<numNodes;i++){
317                         int escape =0;
318                         int rnd_index1, rnd_index2;
319                         while(escape == 0){
320                                 rnd_index1 = (int)(((double)rand() / (double) RAND_MAX)*i);
321                                 if(tree[rnd_index1].getParent() == -1){escape = 1;}
322                         }
323                 
324                         escape = 0;
325                         while(escape == 0){
326                                 rnd_index2 = (int)(((double)rand() / (double) RAND_MAX)*i);
327                                 if(rnd_index2 != rnd_index1 && tree[rnd_index2].getParent() == -1){
328                                         escape = 1;
329                                 }               
330                         }
331                 
332                         tree[i].setChildren(rnd_index1,rnd_index2);
333                         tree[i].setParent(-1);
334                         tree[rnd_index1].setParent(i);
335                         tree[rnd_index2].setParent(i);
336                 }
337         }
338         catch(exception& e) {
339                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function randomTopology. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
340                 exit(1);
341         }
342         catch(...) {
343                 cout << "An unknown error has occurred in the Tree class function randomTopology. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
344                 exit(1);
345         }               
346 }
347
348 /*****************************************************************/
349 // This prints out the tree in Newick form.
350 void Tree::createNewickFile() {
351         try {
352                 int root = findRoot();
353                 filename = getRootName(globaldata->getTreeFile()) + "newick";
354                 openOutputFile(filename, out);
355                 
356                 printBranch(root);
357                 
358                 // you are at the end of the tree
359                 out << ";" << endl;
360         }
361         catch(exception& e) {
362                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function createNewickFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
363                 exit(1);
364         }
365         catch(...) {
366                 cout << "An unknown error has occurred in the Tree class function createNewickFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
367                 exit(1);
368         }               
369 }
370
371 /*****************************************************************/
372 //This function finds the index of the root node.
373
374 int Tree::findRoot() {
375         try {
376                 for (int i = 0; i < numNodes; i++) {
377                         //you found the root
378                         if (tree[i].getParent() == -1) { return i; }
379                 }
380                 return -1;
381         }
382         catch(exception& e) {
383                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function findRoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
384                 exit(1);
385         }
386         catch(...) {
387                 cout << "An unknown error has occurred in the Tree class function findRoot. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
388                 exit(1);
389         }               
390 }
391
392 /*****************************************************************/
393 void Tree::printBranch(int node) {
394         try {
395                 
396                 // you are not a leaf
397                 if (tree[node].getLChild() != -1) {
398                         out << "(";
399                         printBranch(tree[node].getLChild());
400                         out << ",";
401                         printBranch(tree[node].getRChild());
402                         out << ")";
403                 }else { //you are a leaf
404                         tree[node].printNode(out);  //prints out name and branch length
405                 }
406                 
407         }
408         catch(exception& e) {
409                 cout << "Standard Error: " << e.what() << " has occurred in the Tree class Function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
410                 exit(1);
411         }
412         catch(...) {
413                 cout << "An unknown error has occurred in the Tree class function printBranch. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
414                 exit(1);
415         }               
416 }
417
418 /*****************************************************************/
419
420
421