]> git.donarmstrong.com Git - mothur.git/blob - readtree.cpp
fixed some bugs
[mothur.git] / readtree.cpp
1 /*
2  *  readtree.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/22/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "readtree.h"
11
12 /***********************************************************************/
13 ReadTree::ReadTree() {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 globaldata->gTree.clear();
17         }
18         catch(exception& e) {
19                 cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
20                 exit(1);
21         }
22         catch(...) {
23                 cout << "An unknown error has occurred in the ReadTree class function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
24                 exit(1);
25         }               
26 }
27 /***********************************************************************/
28 int ReadTree::readSpecialChar(istream& f, char c, string name) {
29     try {
30         
31                 gobble(f);
32                 char d = f.get();
33         
34                 if(d == EOF){
35                         cerr << "Error: Input file ends prematurely, expecting a " << name << "\n";
36                         exit(1);
37                 }
38                 if(d != c){
39                         cerr << "Error: Expected " << name << " in input file.  Found " << d << ".\n";
40                         exit(1);
41                 }
42                 if(d == ')' && f.peek() == '\n'){
43                         gobble(f);
44                 }       
45                 return d;
46         }
47         catch(exception& e) {
48                 cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function readSpecialChar. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
49                 exit(1);
50         }
51         catch(...) {
52                 cout << "An unknown error has occurred in the ReadTree class function readSpecialChar. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
53                 exit(1);
54         }               
55 }
56 /**************************************************************************************************/
57
58 int ReadTree::readNodeChar(istream& f) {
59         try {
60 //              while(isspace(d=f.get()))               {;}
61                 gobble(f);
62                 char d = f.get();
63
64                 if(d == EOF){
65                         cerr << "Error: Input file ends prematurely, expecting a left parenthesis\n";
66                         exit(1);
67                 }
68                 return d;
69         }
70         catch(exception& e) {
71                 cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function readNodeChar. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
72                 exit(1);
73         }
74         catch(...) {
75                 cout << "An unknown error has occurred in the ReadTree class function readNodeChar. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
76                 exit(1);
77         }               
78 }
79
80 /**************************************************************************************************/
81
82 float ReadTree::readBranchLength(istream& f) {
83     try {
84                 float b;
85         
86                 if(!(f >> b)){
87                         cerr << "Error: Missing branch length in input tree.\n";
88                         exit(1);
89                 }
90                 gobble(f);
91                 return b;
92         }
93         catch(exception& e) {
94                 cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function readBranchLength. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
95                 exit(1);
96         }
97         catch(...) {
98                 cout << "An unknown error has occurred in the ReadTree class function readBranchLength. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
99                 exit(1);
100         }               
101 }
102
103 /***********************************************************************/
104 /***********************************************************************/
105
106 //Child Classes Below
107
108 /***********************************************************************/
109 /***********************************************************************/
110 //This class reads a file in Newick form and stores it in a tree.
111
112 int ReadNewickTree::read() {
113         try {
114                 holder = "";
115                 int c, error;
116                 int comment = 0;
117                 
118                 //if you are not a nexus file 
119                 if ((c = filehandle.peek()) != '#') {  
120                         while((c = filehandle.peek()) != EOF) { 
121                                 while ((c = filehandle.peek()) != EOF) {
122                                         // get past comments
123                                         if(c == '[') {
124                                                 comment = 1;
125                                         }
126                                         if(c == ']'){
127                                                 comment = 0;
128                                         }
129                                         if((c == '(') && (comment != 1)){ break; }
130                                         filehandle.get();
131                                 }
132
133                                 //make new tree
134                                 T = new Tree(); 
135                                 numNodes = T->getNumNodes();
136                                 numLeaves = T->getNumLeaves();
137                                 
138                                 error = readTreeString(); 
139                                 
140                                 //save trees for later commands
141                                 globaldata->gTree.push_back(T); 
142                                 gobble(filehandle);
143                         }
144                 //if you are a nexus file
145                 }else if ((c = filehandle.peek()) == '#') {
146                         nexusTranslation();  //reads file through the translation and updates treemap
147                         while((c = filehandle.peek()) != EOF) { 
148                                 // get past comments
149                                 while ((c = filehandle.peek()) != EOF) {        
150                                         if(holder == "[" || holder == "[!"){
151                                                 comment = 1;
152                                         }
153                                         if(holder == "]"){
154                                                 comment = 0;
155                                         }
156                                         if((holder == "tree" || holder == "end;") && comment != 1){ holder = ""; comment = 0; break;}
157                                         filehandle >> holder;
158                                 }
159                         
160                                 //pass over the "tree rep.6878900 = "
161                                 while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF) ) {;}
162                                         
163                                 if (c == EOF ) { break; }
164                                 filehandle.putback(c);  //put back first ( of tree.
165                                 
166                                 //make new tree
167                                 T = new Tree(); 
168                                 numNodes = T->getNumNodes();
169                                 numLeaves = T->getNumLeaves();
170                                 
171                                 //read tree info
172                                 error = readTreeString(); 
173                                  
174                                 //save trees for later commands
175                                 globaldata->gTree.push_back(T); 
176                         }
177                 }
178                 
179                 if (error != 0) { readOk = error; } 
180                 
181                 filehandle.close();
182                 return readOk;
183         }
184         catch(exception& e) {
185                 cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
186                 exit(1);
187         }
188         catch(...) {
189                 cout << "An unknown error has occurred in the ReadNewickTree class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
190                 exit(1);
191         }               
192 }
193 /**************************************************************************************************/
194 //This function read the file through the translation of the sequences names and updates treemap.
195 void ReadNewickTree::nexusTranslation() {
196         try {
197                 
198                 holder = "";
199                 int numSeqs = globaldata->gTreemap->getNumSeqs(); //must save this some when we clear old names we can still know how many sequences there were
200                 int comment = 0;
201                 
202                 // get past comments
203                 while(holder != "translate" && holder != "Translate"){  
204                         if(holder == "[" || holder == "[!"){
205                                 comment = 1;
206                         }
207                         if(holder == "]"){
208                                 comment = 0;
209                         }
210                         filehandle >> holder; 
211                         if(holder == "tree" && comment != 1){return;}
212                 }
213                 
214                 //update treemap
215                 globaldata->gTreemap->namesOfSeqs.clear();
216                 for(int i=0;i<numSeqs;i++){
217                         string number, name;
218                         filehandle >> number;
219                         filehandle >> name;
220                         name.erase(name.end()-1);  //erase the comma
221                         //insert new one with new name
222                         globaldata->gTreemap->treemap[toString(number)].groupname = globaldata->gTreemap->treemap[name].groupname;
223                         globaldata->gTreemap->treemap[toString(number)].vectorIndex = globaldata->gTreemap->treemap[name].vectorIndex;
224                         //erase old one.  so treemap[sarah].groupnumber is now treemap[1].groupnumber. if number is 1 and name is sarah.
225                         globaldata->gTreemap->treemap.erase(name);
226                         globaldata->gTreemap->namesOfSeqs.push_back(number);
227                 }
228         }
229         catch(exception& e) {
230                 cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function nexus. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
231                 exit(1);
232         }
233         catch(...) {
234                 cout << "An unknown error has occurred in the ReadNewickTree class function nexus. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
235                 exit(1);
236         }               
237 }
238
239 /**************************************************************************************************/
240 int ReadNewickTree::readTreeString() {
241         try {
242                 
243                 int n = 0;
244                 int lc, rc; 
245                 
246                 int rooted = 0;
247         
248                 int ch = filehandle.peek();     
249                 
250                 if(ch == '('){
251                         n = numLeaves;  //number of leaves / sequences, we want node 1 to start where the leaves left off
252
253                         lc = readNewickInt(filehandle, n, T);
254                         if (lc == -1) { cout << "error with lc" << endl; return -1; } //reports an error in reading
255                 
256                         if(filehandle.peek()==','){                                                     
257                                 readSpecialChar(filehandle,',',"comma");
258                         }
259                         // ';' means end of tree.                                                                                               
260                         else if((ch=filehandle.peek())==';' || ch=='['){                
261                                 rooted = 1;                                                                     
262                         }                                                                                               
263                         if(rooted != 1){                                                                
264                                 rc = readNewickInt(filehandle, n, T);
265                                 if (rc == -1) { cout << "error with rc" << endl; return -1; } //reports an error in reading
266                                 if(filehandle.peek() == ')'){                                   
267                                         readSpecialChar(filehandle,')',"right parenthesis");
268                                 }                                                                                       
269                         }                                                                                               
270                 }
271                 //note: treeclimber had the code below added - not sure why?
272                 else{
273                         filehandle.putback(ch);
274                         char name[MAX_LINE];
275                         filehandle.get(name, MAX_LINE,'\n');
276                         SKIPLINE(filehandle, ch);
277                 
278                         n = T->getIndex(name);
279
280                         if(n!=0){
281                                 cerr << "Internal error: The only taxon is not taxon 0.\n";
282                                 //exit(1);
283                                 readOk = -1; return -1;
284                         }
285                         lc = rc = -1;
286                 } 
287                 
288                 while((ch=filehandle.get())!=';'){;}                                            
289                 if(rooted != 1){                                                                        
290                         T->tree[n].setChildren(lc,rc);
291                         T->tree[n].setBranchLength(0);
292                         T->tree[n].setParent(-1);
293                         if(lc!=-1){             T->tree[lc].setParent(n);               }
294                         if(rc!=-1){             T->tree[rc].setParent(n);               }
295                 }
296                 return 0;
297         
298         }
299         catch(exception& e) {
300                 cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function readTreeString. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
301                 exit(1);
302         }
303         catch(...) {
304                 cout << "An unknown error has occurred in the ReadNewickTree class function readTreeString. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
305                 exit(1);
306         }               
307
308 }
309 /**************************************************************************************************/
310
311 int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) {
312         try {
313                 int c = readNodeChar(f);
314     
315                 if(c == '('){
316                         int lc = readNewickInt(f, n, T);
317                         if (lc == -1) { return -1; } //reports an error in reading
318                         readSpecialChar(f,',',"comma");
319
320                         int rc = readNewickInt(f, n, T);
321                         if (rc == -1) { return -1; }  //reports an error in reading     
322                         if(f.peek()==')'){      
323                                 readSpecialChar(f,')',"right parenthesis");     
324                         }                       
325                 
326                         if(f.peek() == ':'){                                                                          
327                                 readSpecialChar(f,':',"colon"); 
328                                                                                 
329                                 if(n >= numNodes){      cerr << "Error: Too many nodes in input tree\n";  readOk = -1; return -1; }
330                                 
331                                 T->tree[n].setBranchLength(readBranchLength(f));
332                         }else{T->tree[n].setBranchLength(0.0); }                                                
333                 
334                         T->tree[n].setChildren(lc,rc);
335                         T->tree[lc].setParent(n);
336                         T->tree[rc].setParent(n);
337                 
338                         return n++;
339                 }else{
340                         f.putback(c);
341                         string name = "";
342                         char d=f.get();
343                         while(d != ':' && d != ',' && d!=')' && d!='\n'){                                       
344                                 name += d;
345                                 d=f.get();
346                         }
347                 
348                         int blen = 0;
349                         if(d == ':')    {               blen = 1;       }               
350                 
351                         f.putback(d);
352                 
353                         //set group info
354                         string group = globaldata->gTreemap->getGroup(name);
355                         
356                         //find index in tree of name
357                         int n1 = T->getIndex(name);
358                         
359                         //adds sequence names that are not in group file to the "xxx" group
360                         if(group == "not found") {
361                                 cout << "Name: " << name << " is not in your groupfile, and will be disregarded. \n";  //readOk = -1; return n1;
362                                 
363                                 globaldata->gTreemap->namesOfSeqs.push_back(name);
364                                 globaldata->gTreemap->treemap[name].groupname = "xxx";
365                                 
366                                 map<string, int>::iterator it;
367                                 it = globaldata->gTreemap->seqsPerGroup.find("xxx");
368                                 if (it == globaldata->gTreemap->seqsPerGroup.end()) { //its a new group
369                                         globaldata->gTreemap->namesOfGroups.push_back("xxx");
370                                         globaldata->gTreemap->seqsPerGroup["xxx"] = 1;
371                                 }else {
372                                         globaldata->gTreemap->seqsPerGroup["xxx"]++;
373                                 }
374                                 
375                                 group = "xxx";
376                         }
377                         
378                         T->tree[n1].setGroup(group);
379                         T->tree[n1].setChildren(-1,-1);
380                 
381                         if(blen == 1){  
382                                 f.get();
383                                 T->tree[n1].setBranchLength(readBranchLength(f));
384                         }else{
385                                 T->tree[n1].setBranchLength(0.0);
386                         }
387                 
388                         while((c=f.get())!=0 && (c != ':' && c != ',' && c!=')') )              {;}             
389                         f.putback(c);
390                 
391                         return n1;
392                 }
393         }
394         catch(exception& e) {
395                 cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function readNewickInt. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
396                 exit(1);
397         }
398         catch(...) {
399                 cout << "An unknown error has occurred in the ReadNewickTree class function readNewickInt. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
400                 exit(1);
401         }               
402 }
403 /**************************************************************************************************/
404 /**************************************************************************************************/
405