]> git.donarmstrong.com Git - mothur.git/blob - readtree.cpp
fixed bug in read.tree
[mothur.git] / readtree.cpp
1 /*
2  *  readtree.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/22/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "readtree.h"
11
12 /***********************************************************************/
13 ReadTree::ReadTree() {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 globaldata->gTree.clear();
17         }
18         catch(exception& e) {
19                 cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
20                 exit(1);
21         }
22         catch(...) {
23                 cout << "An unknown error has occurred in the ReadTree class function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
24                 exit(1);
25         }               
26 }
27 /***********************************************************************/
28 int ReadTree::readSpecialChar(istream& f, char c, string name) {
29     try {
30         
31                 gobble(f);
32                 char d = f.get();
33         
34                 if(d == EOF){
35                         cerr << "Error: Input file ends prematurely, expecting a " << name << "\n";
36                         exit(1);
37                 }
38                 if(d != c){
39                         cerr << "Error: Expected " << name << " in input file.  Found " << d << ".\n";
40                         exit(1);
41                 }
42                 if(d == ')' && f.peek() == '\n'){
43                         gobble(f);
44                 }       
45                 return d;
46         }
47         catch(exception& e) {
48                 cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function readSpecialChar. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
49                 exit(1);
50         }
51         catch(...) {
52                 cout << "An unknown error has occurred in the ReadTree class function readSpecialChar. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
53                 exit(1);
54         }               
55 }
56 /**************************************************************************************************/
57
58 int ReadTree::readNodeChar(istream& f) {
59         try {
60 //              while(isspace(d=f.get()))               {;}
61                 gobble(f);
62                 char d = f.get();
63
64                 if(d == EOF){
65                         cerr << "Error: Input file ends prematurely, expecting a left parenthesis\n";
66                         exit(1);
67                 }
68                 return d;
69         }
70         catch(exception& e) {
71                 cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function readNodeChar. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
72                 exit(1);
73         }
74         catch(...) {
75                 cout << "An unknown error has occurred in the ReadTree class function readNodeChar. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
76                 exit(1);
77         }               
78 }
79
80 /**************************************************************************************************/
81
82 float ReadTree::readBranchLength(istream& f) {
83     try {
84                 float b;
85         
86                 if(!(f >> b)){
87                         cerr << "Error: Missing branch length in input tree.\n";
88                         exit(1);
89                 }
90                 gobble(f);
91                 return b;
92         }
93         catch(exception& e) {
94                 cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function readBranchLength. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
95                 exit(1);
96         }
97         catch(...) {
98                 cout << "An unknown error has occurred in the ReadTree class function readBranchLength. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
99                 exit(1);
100         }               
101 }
102
103
104 /***********************************************************************/
105 /***********************************************************************/
106
107
108 //Child Classes Below
109
110 /***********************************************************************/
111 /***********************************************************************/
112 //This class reads a file in Newick form and stores it in a tree.
113
114 void ReadNewickTree::read() {
115         try {
116                 int c;
117                 int comment = 0;
118                 
119                 //if you are not a nexus file 
120                 if ((c = filehandle.peek()) != '#') {  
121                         while((c = filehandle.peek()) != EOF) { 
122                                 //make new tree
123                                 T = new Tree(); 
124                                 numNodes = T->getNumNodes();
125                                 numLeaves = T->getNumLeaves();
126                                 
127                                 readTreeString();  
128                                 //save trees for later commands
129                                 globaldata->gTree.push_back(T); 
130                                 gobble(filehandle);
131                         }
132                 //if you are a nexus file
133                 }else if ((c = filehandle.peek()) == '#') {
134                         nexusTranslation();  //reads file through the translation and updates treemap
135                         while((c = filehandle.peek()) != EOF) { 
136                                 // get past comments
137                                 while ((c = filehandle.peek()) != EOF) {        
138                                         if(holder == "[" || holder == "[!"){
139                                                 comment = 1;
140                                         }
141                                         if(holder == "]"){
142                                                 comment = 0;
143                                         }
144                                         if((holder == "tree" || holder == "end;") && comment != 1){ holder = ""; comment = 0; break;}
145                                         filehandle >> holder;
146                                 }
147                         
148                                 //pass over the "tree rep.6878900 = "
149                                 while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF) ) {;}
150                                         
151                                 if (c == EOF ) { break; }
152                                 filehandle.putback(c);  //put back first ( of tree.
153                                 
154                                 //make new tree
155                                 T = new Tree(); 
156                                 numNodes = T->getNumNodes();
157                                 numLeaves = T->getNumLeaves();
158                                 
159                                 //read tree info
160                                 readTreeString();  
161                                 //save trees for later commands
162                                 globaldata->gTree.push_back(T); 
163                         }
164                 }
165                 
166         }
167         catch(exception& e) {
168                 cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
169                 exit(1);
170         }
171         catch(...) {
172                 cout << "An unknown error has occurred in the ReadNewickTree class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
173                 exit(1);
174         }               
175 }
176 /**************************************************************************************************/
177 //This function read the file through the translation of the sequences names and updates treemap.
178 void ReadNewickTree::nexusTranslation() {
179         try {
180                 
181                 holder = "";
182                 int numSeqs = globaldata->gTreemap->getNumSeqs(); //must save this some when we clear old names we can still know how many sequences there were
183                 int comment = 0;
184                 
185                 // get past comments
186                 while(holder != "translate" && holder != "Translate"){  
187                         if(holder == "[" || holder == "[!"){
188                                 comment = 1;
189                         }
190                         if(holder == "]"){
191                                 comment = 0;
192                         }
193                         filehandle >> holder; 
194                         if(holder == "tree" && comment != 1){return;}
195                 }
196                 
197                 //update treemap
198                 globaldata->gTreemap->namesOfSeqs.clear();
199                 for(int i=0;i<numSeqs;i++){
200                         string number, name;
201                         filehandle >> number;
202                         filehandle >> name;
203                         name.erase(name.end()-1);  //erase the comma
204                         //insert new one with new name
205                         globaldata->gTreemap->treemap[toString(number)].groupname = globaldata->gTreemap->treemap[name].groupname;
206                         globaldata->gTreemap->treemap[toString(number)].vectorIndex = globaldata->gTreemap->treemap[name].vectorIndex;
207                         //erase old one.  so treemap[sarah].groupnumber is now treemap[1].groupnumber. if number is 1 and name is sarah.
208                         globaldata->gTreemap->treemap.erase(name);
209                         globaldata->gTreemap->namesOfSeqs.push_back(number);
210                 }
211         }
212         catch(exception& e) {
213                 cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function nexus. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
214                 exit(1);
215         }
216         catch(...) {
217                 cout << "An unknown error has occurred in the ReadNewickTree class function nexus. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
218                 exit(1);
219         }               
220 }
221
222 /**************************************************************************************************/
223 void ReadNewickTree::readTreeString() {
224         try {
225                 
226                 int n = 0;
227                 int lc, rc; 
228                 
229                 int rooted = 0;
230         
231                 int ch = filehandle.peek();     
232                 
233                 if(ch == '('){
234                         n = numLeaves;  //number of leaves / sequences, we want node 1 to start where the leaves left off
235
236                         lc = readNewickInt(filehandle, n, T);
237                 
238                         if(filehandle.peek()==','){                                                     
239                                 readSpecialChar(filehandle,',',"comma");
240                         }
241                         // ';' means end of tree.                                                                                               
242                         else if((ch=filehandle.peek())==';' || ch=='['){                
243                                 rooted = 1;                                                                     
244                         }                                                                                               
245                         if(rooted != 1){                                                                
246                                 rc = readNewickInt(filehandle, n, T);
247                                 if(filehandle.peek() == ')'){                                   
248                                         readSpecialChar(filehandle,')',"right parenthesis");
249                                 }                                                                                       
250                         }                                                                                               
251                 }
252                 //note: treeclimber had the code below added - not sure why?
253                 else{
254                         filehandle.putback(ch);
255                         char name[MAX_LINE];
256                         filehandle.get(name, MAX_LINE,'\n');
257                         SKIPLINE(filehandle, ch);
258                 
259                         n = T->getIndex(name);
260
261                         if(n!=0){
262                                 cerr << "Internal error: The only taxon is not taxon 0.\n";
263                                 exit(1);
264                         }
265                         lc = rc = -1;
266                 } 
267                 
268                 while((ch=filehandle.get())!=';'){;}                                            
269                 if(rooted != 1){                                                                        
270                         T->tree[n].setChildren(lc,rc);
271                         T->tree[n].setBranchLength(0);
272                         T->tree[n].setParent(-1);
273                         if(lc!=-1){             T->tree[lc].setParent(n);               }
274                         if(rc!=-1){             T->tree[rc].setParent(n);               }
275                 }
276         
277         }
278         catch(exception& e) {
279                 cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function readTreeString. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
280                 exit(1);
281         }
282         catch(...) {
283                 cout << "An unknown error has occurred in the ReadNewickTree class function readTreeString. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
284                 exit(1);
285         }               
286
287 }
288 /**************************************************************************************************/
289
290 int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) {
291         try {
292                 int c = readNodeChar(f);
293     
294                 if(c == '('){
295                         int lc = readNewickInt(f, n, T);
296                         readSpecialChar(f,',',"comma");
297
298                         int rc = readNewickInt(f, n, T);                
299                         if(f.peek()==')'){      
300                                 readSpecialChar(f,')',"right parenthesis");                                     
301                         }                       
302                 
303                         if(f.peek() == ':'){                                                                          
304                                 readSpecialChar(f,':',"colon");                                                 
305                                 if(n >= numNodes){      cerr << "Error: Too many nodes in input tree\n";  exit(1); }
306                                 T->tree[n].setBranchLength(readBranchLength(f));
307                         }else{T->tree[n].setBranchLength(0.0); }                                                
308                 
309                         T->tree[n].setChildren(lc,rc);
310                         T->tree[lc].setParent(n);
311                         T->tree[rc].setParent(n);
312                 
313                         return n++;
314                 }else{
315                         f.putback(c);
316                         string name = "";
317                         char d=f.get();
318                         while(d != ':' && d != ',' && d!=')' && d!='\n'){                                       
319                                 name += d;
320                                 d=f.get();
321                         }
322                 
323                         int blen = 0;
324                         if(d == ':')    {               blen = 1;                       }               
325                 
326                         f.putback(d);
327                 
328                         //set group info
329                         string group = globaldata->gTreemap->getGroup(name);
330                         
331                         //find index in tree of name
332                         int n1 = T->getIndex(name);
333                         
334                         if(n1 == -1){cerr << "Name: " << name << " not found\n"; exit(1);}
335                         
336                         else T->tree[n1].setGroup(group);
337                 
338                         T->tree[n1].setChildren(-1,-1);
339                 
340                         if(blen == 1){  
341                                 f.get();                
342                                 T->tree[n1].setBranchLength(readBranchLength(f));
343                         }else{
344                                 T->tree[n1].setBranchLength(0.0);
345                         }
346                 
347                         while((c=f.get())!=0 && (c != ':' && c != ',' && c!=')') )              {;}             
348                         f.putback(c);
349                 
350                         return n1;
351                 }
352         }
353         catch(exception& e) {
354                 cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function readNewickInt. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
355                 exit(1);
356         }
357         catch(...) {
358                 cout << "An unknown error has occurred in the ReadNewickTree class function readNewickInt. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
359                 exit(1);
360         }               
361 }
362 /**************************************************************************************************/
363 /**************************************************************************************************/
364