]> git.donarmstrong.com Git - mothur.git/blob - readtree.cpp
adding treeclimber and unifrac pieces
[mothur.git] / readtree.cpp
1 /*
2  *  readtree.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/22/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "readtree.h"
11
12 /***********************************************************************/
13 ReadTree::ReadTree() {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 globaldata->gTree.clear();
17         }
18         catch(exception& e) {
19                 cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
20                 exit(1);
21         }
22         catch(...) {
23                 cout << "An unknown error has occurred in the ReadTree class function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
24                 exit(1);
25         }               
26 }
27 /***********************************************************************/
28 int ReadTree::readSpecialChar(istream& f, char c, string name) {
29     try {
30                 char d;
31         
32                 while(isspace(d=f.get()))               {;}
33                 if(d == EOF){
34                         cerr << "Error: Input file ends prematurely, expecting a " << name << "\n";
35                         exit(1);
36                 }
37                 if(d != c){
38                         cerr << "Error: Expected " << name << " in input file.  Found " << d << ".\n";
39                         exit(1);
40                 }
41                 if(d == ')' && f.peek() == '\n'){
42                         while(isspace(d=f.get()))               {;}
43                         f.putback(d);
44                 }       
45         
46                 return d;
47         }
48         catch(exception& e) {
49                 cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function readSpecialChar. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
50                 exit(1);
51         }
52         catch(...) {
53                 cout << "An unknown error has occurred in the ReadTree class function readSpecialChar. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
54                 exit(1);
55         }               
56 }
57 /**************************************************************************************************/
58
59 int ReadTree::readNodeChar(istream& f) {
60         try {
61                 char d;
62                 while(isspace(d=f.get()))               {;}
63                 if(d == EOF){
64                         cerr << "Error: Input file ends prematurely, expecting a left parenthesis\n";
65                         exit(1);
66                 }
67                 return d;
68         }
69         catch(exception& e) {
70                 cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function readNodeChar. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
71                 exit(1);
72         }
73         catch(...) {
74                 cout << "An unknown error has occurred in the ReadTree class function readNodeChar. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
75                 exit(1);
76         }               
77 }
78
79 /**************************************************************************************************/
80
81 float ReadTree::readBranchLength(istream& f) {
82     try {
83                 float b;
84         
85                 if(!(f >> b)){
86                         cerr << "Error: Missing branch length in input tree.\n";
87                         exit(1);
88                 }
89     
90                 return b;
91         }
92         catch(exception& e) {
93                 cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function readBranchLength. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
94                 exit(1);
95         }
96         catch(...) {
97                 cout << "An unknown error has occurred in the ReadTree class function readBranchLength. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
98                 exit(1);
99         }               
100 }
101
102
103 /***********************************************************************/
104 /***********************************************************************/
105
106
107 //Child Classes Below
108
109 /***********************************************************************/
110 /***********************************************************************/
111 //This class reads a file in Newick form and stores it in a tree.
112
113 void ReadNewickTree::read() {
114         try {
115                 int c;
116                 int comment = 0;
117                 
118                 //if you are not a nexus file 
119                 if ((c = filehandle.peek()) != '#') {  
120                         while((c = filehandle.peek()) != EOF) { 
121                                 //make new tree
122                                 T = new Tree(); 
123                                 numNodes = T->getNumNodes();
124                                 numLeaves = T->getNumLeaves();
125                                 
126                                 readTreeString();  
127                                 //save trees for later commands
128                                 globaldata->gTree.push_back(T); 
129                         }
130                 //if you are a nexus file
131                 }else if ((c = filehandle.peek()) == '#') {
132                         nexusTranslation();  //reads file through the translation and updates treemap
133                         while((c = filehandle.peek()) != EOF) { 
134                                 // get past comments
135                                 while ((c = filehandle.peek()) != EOF) {        
136                                         if(holder == "[" || holder == "[!"){
137                                                 comment = 1;
138                                         }
139                                         if(holder == "]"){
140                                                 comment = 0;
141                                         }
142                                         if((holder == "tree" || holder == "end;") && comment != 1){ holder = ""; comment = 0; break;}
143                                         filehandle >> holder;
144                                 }
145                         
146                                 //pass over the "tree rep.6878900 = "
147                                 while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF) ) {;}
148                                         
149                                 if (c == EOF ) { break; }
150                                 filehandle.putback(c);  //put back first ( of tree.
151                                 
152                                 //make new tree
153                                 T = new Tree(); 
154                                 numNodes = T->getNumNodes();
155                                 numLeaves = T->getNumLeaves();
156                                 
157                                 //read tree info
158                                 readTreeString();  
159                                 //save trees for later commands
160                                 globaldata->gTree.push_back(T); 
161                         }
162                 }
163                 
164         }
165         catch(exception& e) {
166                 cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
167                 exit(1);
168         }
169         catch(...) {
170                 cout << "An unknown error has occurred in the ReadNewickTree class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
171                 exit(1);
172         }               
173 }
174 /**************************************************************************************************/
175 //This function read the file through the translation of the sequences names and updates treemap.
176 void ReadNewickTree::nexusTranslation() {
177         try {
178                 
179                 holder = "";
180                 int numSeqs = globaldata->gTreemap->getNumSeqs(); //must save this some when we clear old names we can still know how many sequences there were
181                 int comment = 0;
182                 
183                 // get past comments
184                 while(holder != "translate" && holder != "Translate"){  
185                         if(holder == "[" || holder == "[!"){
186                                 comment = 1;
187                         }
188                         if(holder == "]"){
189                                 comment = 0;
190                         }
191                         filehandle >> holder; 
192                         if(holder == "tree" && comment != 1){return;}
193                 }
194                 
195                 //update treemap
196                 globaldata->gTreemap->namesOfSeqs.clear();
197                 for(int i=0;i<numSeqs;i++){
198                         string number, name;
199                         filehandle >> number;
200                         filehandle >> name;
201                         name.erase(name.end()-1);  //erase the comma
202                         //insert new one with new name
203                         globaldata->gTreemap->treemap[toString(number)].groupname = globaldata->gTreemap->treemap[name].groupname;
204                         globaldata->gTreemap->treemap[toString(number)].vectorIndex = globaldata->gTreemap->treemap[name].vectorIndex;
205                         //erase old one.  so treemap[sarah].groupnumber is now treemap[1].groupnumber. if number is 1 and name is sarah.
206                         globaldata->gTreemap->treemap.erase(name);
207                         globaldata->gTreemap->namesOfSeqs.push_back(number);
208                 }
209         }
210         catch(exception& e) {
211                 cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function nexus. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
212                 exit(1);
213         }
214         catch(...) {
215                 cout << "An unknown error has occurred in the ReadNewickTree class function nexus. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
216                 exit(1);
217         }               
218 }
219
220 /**************************************************************************************************/
221 void ReadNewickTree::readTreeString() {
222         try {
223                 
224                 int n = 0;
225                 int lc, rc; 
226                 
227                 int rooted = 0;
228         
229                 int ch = filehandle.peek();     
230                 
231                 if(ch == '('){
232                         n = numLeaves;  //number of leaves / sequences, we want node 1 to start where the leaves left off
233                         lc = readNewickInt(filehandle, n, T);
234                 
235                         if(filehandle.peek()==','){                                                     
236                                 readSpecialChar(filehandle,',',"comma");
237                         }
238                         // ';' means end of tree.                                                                                               
239                         else if((ch=filehandle.peek())==';' || ch=='['){                
240                                 rooted = 1;                                                                     
241                         }                                                                                               
242                         if(rooted != 1){                                                                
243                                 rc = readNewickInt(filehandle, n, T);
244                                 if(filehandle.peek() == ')'){                                   
245                                         readSpecialChar(filehandle,')',"right parenthesis");
246                                 }                                                                                       
247                         }                                                                                               
248                 }
249                 //note: treeclimber had the code below added - not sure why?
250                 else{
251                         filehandle.putback(ch);
252                         char name[MAX_LINE];
253                         filehandle.get(name, MAX_LINE,'\n');
254                         SKIPLINE(filehandle, ch);
255                 
256                 
257                         n = T->getIndex(name);
258                         if(n!=0){
259                                 cerr << "Internal error: The only taxon is not taxon 0.\n";
260                                 exit(1);
261                         }
262                         lc = rc = -1;
263                 } 
264                 
265                 while((ch=filehandle.get())!=';'){;}                                            
266                         if(rooted != 1){                                                                        
267                         T->tree[n].setChildren(lc,rc);
268                         T->tree[n].setBranchLength(0);
269                         T->tree[n].setParent(-1);
270                         if(lc!=-1){             T->tree[lc].setParent(n);               }
271                         if(rc!=-1){             T->tree[rc].setParent(n);               }
272                 }
273         
274         }
275         catch(exception& e) {
276                 cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function readTreeString. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
277                 exit(1);
278         }
279         catch(...) {
280                 cout << "An unknown error has occurred in the ReadNewickTree class function readTreeString. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
281                 exit(1);
282         }               
283
284 }
285 /**************************************************************************************************/
286
287 int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) {
288         try {
289                 int c = readNodeChar(f);
290     
291                 if(c == '('){
292                         int lc = readNewickInt(f, n, T);
293                         readSpecialChar(f,',',"comma");
294                 
295                         int rc = readNewickInt(f, n, T);                
296                         if(f.peek()==')'){      
297                                 readSpecialChar(f,')',"right parenthesis");                                     
298                         }                       
299                 
300                         if(f.peek() == ':'){                                                                          
301                                 readSpecialChar(f,':',"colon");                                                 
302                                 if(n >= numNodes){      cerr << "Error: Too many nodes in input tree\n";  exit(1); }
303                                 T->tree[n].setBranchLength(readBranchLength(f));
304                         }else{T->tree[n].setBranchLength(0.0); }                                                
305                 
306                         T->tree[n].setChildren(lc,rc);
307                         T->tree[lc].setParent(n);
308                         T->tree[rc].setParent(n);
309                 
310                         return n++;
311                 }else{
312                         f.putback(c);
313                         string name = "";
314                         char d=f.get();
315                         while(d != ':' && d != ',' && d!=')' && d!='\n'){                                       
316                                 name += d;
317                                 d=f.get();
318                         }
319                 
320                         int blen = 0;
321                         if(d == ':')    {               blen = 1;                       }               
322                 
323                         f.putback(d);
324                 
325                         //set group info
326                         string group = globaldata->gTreemap->getGroup(name);
327                         
328                         //find index in tree of name
329                         int n1 = T->getIndex(name);
330                         
331                         if(n1 == -1){cerr << "Name: " << name << " not found\n"; exit(1);}
332                         
333                         else T->tree[n1].setGroup(group);
334                 
335                         T->tree[n1].setChildren(-1,-1);
336                 
337                         if(blen == 1){  
338                                 f.get();                
339                                 T->tree[n1].setBranchLength(readBranchLength(f));
340                         }else{
341                                 T->tree[n1].setBranchLength(0.0);
342                         }
343                 
344                         while((c=f.get())!=0 && (c != ':' && c != ',' && c!=')') )              {;}             
345                         f.putback(c);
346                 
347                         return n1;
348                 }
349         }
350         catch(exception& e) {
351                 cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function readNewickInt. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
352                 exit(1);
353         }
354         catch(...) {
355                 cout << "An unknown error has occurred in the ReadNewickTree class function readNewickInt. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
356                 exit(1);
357         }               
358 }
359 /**************************************************************************************************/
360 /**************************************************************************************************/
361