]> git.donarmstrong.com Git - mothur.git/blob - readtree.cpp
changing command name classify.shared to classifyrf.shared
[mothur.git] / readtree.cpp
1 /*
2  *  readtree.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/22/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "readtree.h"
11
12 /***********************************************************************/
13 ReadTree::ReadTree() {
14         try {
15                 m = MothurOut::getInstance();
16         }
17         catch(exception& e) {
18                 m->errorOut(e, "ReadTree", "ReadTree");
19                 exit(1);
20         }
21 }
22 /***********************************************************************/
23 int ReadTree::AssembleTrees() {
24          try {
25                  //assemble users trees
26                  for (int i = 0; i < Trees.size(); i++) {
27                          if (m->control_pressed) { return 0;  }
28                          Trees[i]->assembleTree();
29                  }
30                  return 0;
31          }
32         catch(exception& e) {
33                 m->errorOut(e, "ReadTree", "AssembleTrees");
34                 exit(1);
35         }
36 }
37 /***********************************************************************/
38 int ReadTree::readSpecialChar(istream& f, char c, string name) {
39     try {
40         
41                 m->gobble(f);
42                 char d = f.get();
43         
44                 if(d == EOF){
45                         m->mothurOut("Error: Input file ends prematurely, expecting a " + name + "\n");
46                         exit(1);
47                 }
48                 if(d != c){
49                         m->mothurOut("Error: Expected " + name + " in input file.  Found " + toString(d) + ".\n");
50                         exit(1);
51                 }
52                 if(d == ')' && f.peek() == '\n'){
53                         m->gobble(f);
54                 }       
55                 return d;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "ReadTree", "readSpecialChar");
59                 exit(1);
60         }
61 }
62 /**************************************************************************************************/
63
64 int ReadTree::readNodeChar(istream& f) {
65         try {
66 //              while(isspace(d=f.get()))               {;}
67                 m->gobble(f);
68                 char d = f.get();
69
70                 if(d == EOF){
71                         m->mothurOut("Error: Input file ends prematurely, expecting a left parenthesis\n");
72                         exit(1);
73                 }
74                 return d;
75         }
76         catch(exception& e) {
77                 m->errorOut(e, "ReadTree", "readNodeChar");
78                 exit(1);
79         }
80 }
81
82 /**************************************************************************************************/
83
84 float ReadTree::readBranchLength(istream& f) {
85     try {
86                 float b;
87         
88                 if(!(f >> b)){
89                         m->mothurOut("Error: Missing branch length in input tree.\n");
90                         exit(1);
91                 }
92                 m->gobble(f);
93                 return b;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "ReadTree", "readBranchLength");
97                 exit(1);
98         }
99 }
100
101 /***********************************************************************/
102 /***********************************************************************/
103
104 //Child Classes Below
105
106 /***********************************************************************/
107 /***********************************************************************/
108 //This class reads a file in Newick form and stores it in a tree.
109
110 int ReadNewickTree::read(CountTable* ct) {
111         try {
112                 holder = "";
113                 int c, error;
114                 int comment = 0;
115                 
116                 //if you are not a nexus file 
117                 if ((c = filehandle.peek()) != '#') {  
118                         while((c = filehandle.peek()) != EOF) {
119                 if (m->control_pressed) {  filehandle.close(); return 0; }
120                                 while ((c = filehandle.peek()) != EOF) {
121                     if (m->control_pressed) {  filehandle.close(); return 0; }
122                                         // get past comments
123                                         if(c == '[') {
124                                                 comment = 1;
125                                         }
126                                         if(c == ']'){
127                                                 comment = 0;
128                                         }
129                                         if((c == '(') && (comment != 1)){ break; }
130                                         filehandle.get();
131                                 }
132
133                                 //make new tree
134                                 T = new Tree(ct); 
135
136                                 numNodes = T->getNumNodes();
137                                 numLeaves = T->getNumLeaves();
138                                 
139                                 error = readTreeString(ct); 
140                                 
141                                 //save trees for later commands
142                                 Trees.push_back(T); 
143                                 m->gobble(filehandle);
144                         }
145                 //if you are a nexus file
146                 }else if ((c = filehandle.peek()) == '#') {
147                         //get right number of seqs from nexus file.
148                         Tree* temp = new Tree(ct);  delete temp;
149                         
150                         nexusTranslation(ct);  //reads file through the translation and updates treemap
151                         while((c = filehandle.peek()) != EOF) {
152                 if (m->control_pressed) {  filehandle.close(); return 0; }
153                                 // get past comments
154                                 while ((c = filehandle.peek()) != EOF) {
155                     if (m->control_pressed) {  filehandle.close(); return 0; }
156                                         if(holder == "[" || holder == "[!"){
157                                                 comment = 1;
158                                         }
159                                         if(holder == "]"){
160                                                 comment = 0;
161                                         }
162                                         if((holder == "tree" || holder == "end;") && comment != 1){ holder = ""; comment = 0; break;}
163                                         filehandle >> holder;
164                                 }
165                         
166                                 //pass over the "tree rep.6878900 = "
167                                 while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF) ) {;}
168                                         
169                                 if (c == EOF ) { break; }
170                                 filehandle.putback(c);  //put back first ( of tree.
171                                 
172                                 //make new tree
173                                 T = new Tree(ct); 
174                                 numNodes = T->getNumNodes();
175                                 numLeaves = T->getNumLeaves();
176                                 
177                                 //read tree info
178                                 error = readTreeString(ct); 
179                                  
180                                 //save trees for later commands
181                                 Trees.push_back(T); 
182                         }
183                 }
184                 
185                 if (error != 0) { readOk = error; } 
186                 
187                 filehandle.close();
188
189                 return readOk;
190         }
191         catch(exception& e) {
192                 m->errorOut(e, "ReadNewickTree", "read");
193                 exit(1);
194         }
195 }
196 /**************************************************************************************************/
197 //This function read the file through the translation of the sequences names and updates treemap.
198 string ReadNewickTree::nexusTranslation(CountTable* ct) {
199         try {
200                 
201                 holder = "";
202                 int numSeqs = m->Treenames.size(); //must save this some when we clear old names we can still know how many sequences there were
203                 int comment = 0;
204                 
205                 // get past comments
206                 while(holder != "translate" && holder != "Translate"){  
207                         if(holder == "[" || holder == "[!"){
208                                 comment = 1;
209                         }
210                         if(holder == "]"){
211                                 comment = 0;
212                         }
213                         filehandle >> holder; 
214                         if(holder == "tree" && comment != 1){return holder;}
215                 }
216     
217                 string number, name;
218                 for(int i=0;i<numSeqs;i++){
219                         
220                         filehandle >> number;
221                         filehandle >> name;
222                         name.erase(name.end()-1);  //erase the comma
223                         ct->renameSeq(name, toString(number));
224                 }
225                 
226                 return name;
227         }
228         catch(exception& e) {
229                 m->errorOut(e, "ReadNewickTree", "nexusTranslation");
230                 exit(1);
231         }
232 }
233
234 /**************************************************************************************************/
235 int ReadNewickTree::readTreeString(CountTable* ct) {
236         try {
237                 
238                 int n = 0;
239                 int lc, rc; 
240                 
241                 int rooted = 0;
242         
243                 int ch = filehandle.peek();     
244                 
245                 if(ch == '('){
246                         n = numLeaves;  //number of leaves / sequences, we want node 1 to start where the leaves left off
247
248                         lc = readNewickInt(filehandle, n, T, ct);
249                         if (lc == -1) { m->mothurOut("error with lc"); m->mothurOutEndLine(); m->control_pressed = true; return -1; } //reports an error in reading
250         
251                         if(filehandle.peek()==','){                                                     
252                                 readSpecialChar(filehandle,',',"comma");
253                         }
254                         // ';' means end of tree.                                                                                               
255                         else if((ch=filehandle.peek())==';' || ch=='['){                
256                                 rooted = 1;                                                                     
257                         }       
258                 
259                         if(rooted != 1){                                                                
260                                 rc = readNewickInt(filehandle, n, T, ct);
261                                 if (rc == -1) { m->mothurOut("error with rc"); m->mothurOutEndLine(); m->control_pressed = true; return -1; } //reports an error in reading
262                                 if(filehandle.peek() == ')'){                                   
263                                         readSpecialChar(filehandle,')',"right parenthesis");
264                                 }                                                                                       
265                         }       
266                 }
267                 //note: treeclimber had the code below added - not sure why?
268                 else{
269                         filehandle.putback(ch);
270                         char name[MAX_LINE];
271                         filehandle.get(name, MAX_LINE,'\n');
272                         SKIPLINE(filehandle, ch);
273                 
274                         n = T->getIndex(name);
275
276                         if(n!=0){
277                                 m->mothurOut("Internal error: The only taxon is not taxon 0.\n");
278                                 //exit(1);
279                                 readOk = -1; return -1;
280                         }
281                         lc = rc = -1;
282                 } 
283                 
284                 while(((ch=filehandle.get())!=';') && (filehandle.eof() != true)){;}    
285                                                         
286                 if(rooted != 1){                                                                        
287                         T->tree[n].setChildren(lc,rc);
288                         T->tree[n].setBranchLength(0);
289                         T->tree[n].setParent(-1);
290                         if(lc!=-1){             T->tree[lc].setParent(n);               }
291                         if(rc!=-1){             T->tree[rc].setParent(n);               }
292                 }
293                 
294                 //T->printTree(); cout << endl;
295                 return 0;
296         
297         }
298         catch(exception& e) {
299                 m->errorOut(e, "ReadNewickTree", "readTreeString");
300                 exit(1);
301         }
302 }
303 /**************************************************************************************************/
304
305 int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T, CountTable* ct) {
306         try {
307                 
308                 if (m->control_pressed) { return -1; } 
309                 
310                 int c = readNodeChar(f);
311
312                 if(c == '('){
313                 
314                         //to account for multifurcating trees generated by fasttree, we are forcing them to be bifurcating
315                         //read all children
316                         vector<int> childrenNodes;
317                         while(f.peek() != ')'){
318                                 int child = readNewickInt(f, n, T, ct);
319                                 if (child == -1) { return -1; } //reports an error in reading
320                 //cout << "child = " << child << endl;          
321                                 childrenNodes.push_back(child);
322                                 
323                                 //after a child you either have , or ), check for both
324                                 if(f.peek()==')'){  break;  }
325                                 else if (f.peek()==',') {   readSpecialChar(f,',',"comma");  }
326                                 else {;}
327                         }
328         //cout << childrenNodes.size() << endl;         
329                         if (childrenNodes.size() < 2) {  m->mothurOut("Error in tree, please correct."); m->mothurOutEndLine(); return -1; }
330                         
331                         //then force into 2 node structure
332                         for (int i = 1; i < childrenNodes.size(); i++) {
333                         
334                                 int lc, rc;
335                                 if (i == 1) { lc = childrenNodes[i-1]; rc = childrenNodes[i]; }
336                                 else { lc = n-1; rc = childrenNodes[i]; }
337                         //cout << i << '\t' << lc << '\t' << rc << endl;        
338                                 T->tree[n].setChildren(lc,rc);
339                                 T->tree[lc].setParent(n);
340                                 T->tree[rc].setParent(n);
341                                 
342                                 //T->printTree(); cout << endl;
343                                 n++;
344                         }
345                         
346                         //to account for extra ++ in looping
347                         n--;
348                         
349                         if(f.peek()==')'){      
350                                 readSpecialChar(f,')',"right parenthesis");     
351                                 //to pass over labels in trees
352                                 c=filehandle.get();
353                                 while((c!=',') && (c != -1) && (c!= ':') && (c!=';')&& (c!=')')){ c=filehandle.get(); }
354                                 filehandle.putback(c);
355                         }                       
356                 
357                         if(f.peek() == ':'){                                                                          
358                                 readSpecialChar(f,':',"colon"); 
359                                                                                 
360                                 if(n >= numNodes){ m->mothurOut("Error: Too many nodes in input tree\n");  readOk = -1; return -1; }
361                                 
362                                 T->tree[n].setBranchLength(readBranchLength(f));
363                         }else{
364                                 T->tree[n].setBranchLength(0.0); 
365                         }                                               
366                                                 
367                         return n++;
368                 
369                 }else{
370                         f.putback(c);
371                         string name = "";
372                         char d=f.get();
373                         while(d != ':' && d != ',' && d!=')' && d!='\n'){                                       
374                                 name += d;
375                                 d=f.get();
376                         }
377 //cout << name << endl;
378                         int blen = 0;
379                         if(d == ':')    {               blen = 1;       }               
380                 
381                         f.putback(d);
382                 
383                         //set group info
384                         vector<string> group = ct->getGroups(name);
385             //cout << name << endl;     
386                         //find index in tree of name
387                         int n1 = T->getIndex(name);
388                         
389                         //adds sequence names that are not in group file to the "xxx" group
390                         if(group.size() == 0) {
391                                 m->mothurOut("Name: " + name + " is not in your groupfile, and will be disregarded. \n");  //readOk = -1; return n1;
392                                 
393                 vector<string> currentGroups = ct->getNamesOfGroups();
394                 if (!m->inUsersGroups("xxx", currentGroups)) {  ct->addGroup("xxx");  }
395                 currentGroups = ct->getNamesOfGroups();
396                 vector<int> thisCounts; thisCounts.resize(currentGroups.size(), 0);
397                 for (int h = 0; h < currentGroups.size(); h++) {  
398                     if (currentGroups[h] == "xxx") {  thisCounts[h] = 1;  break; }
399                 }
400                 ct->push_back(name, thisCounts);
401                 
402                                 group.push_back("xxx");
403                         }                       
404                         T->tree[n1].setGroup(group);
405                         T->tree[n1].setChildren(-1,-1);
406                 
407                         if(blen == 1){  
408                                 f.get();
409                                 T->tree[n1].setBranchLength(readBranchLength(f));
410                         }else{
411                                 T->tree[n1].setBranchLength(0.0);
412                         }
413                 
414                         while((c=f.get())!=0 && (c != ':' && c != ',' && c!=')') )              {;}             
415         
416                         f.putback(c);
417                 
418                         return n1;
419                 }
420         }
421         catch(exception& e) {
422                 m->errorOut(e, "ReadNewickTree", "readNewickInt");
423                 exit(1);
424         }
425 }
426 /**************************************************************************************************/
427 /**************************************************************************************************/
428