]> git.donarmstrong.com Git - mothur.git/blob - readtree.cpp
Revert to previous commit
[mothur.git] / readtree.cpp
1 /*
2  *  readtree.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/22/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "readtree.h"
11
12 /***********************************************************************/
13 ReadTree::ReadTree() {
14         try {
15                 m = MothurOut::getInstance();
16         }
17         catch(exception& e) {
18                 m->errorOut(e, "ReadTree", "ReadTree");
19                 exit(1);
20         }
21 }
22 /***********************************************************************/
23 int ReadTree::AssembleTrees(map<string, string> nameMap) {
24          try {
25                  //assemble users trees
26                  for (int i = 0; i < Trees.size(); i++) {
27                          if (m->control_pressed) { return 0;  }
28                          Trees[i]->assembleTree(nameMap);
29                  }
30                  return 0;
31          }
32         catch(exception& e) {
33                 m->errorOut(e, "ReadTree", "AssembleTrees");
34                 exit(1);
35         }
36 }
37 /***********************************************************************/
38 int ReadTree::readSpecialChar(istream& f, char c, string name) {
39     try {
40         
41                 m->gobble(f);
42                 char d = f.get();
43         
44                 if(d == EOF){
45                         m->mothurOut("Error: Input file ends prematurely, expecting a " + name + "\n");
46                         exit(1);
47                 }
48                 if(d != c){
49                         m->mothurOut("Error: Expected " + name + " in input file.  Found " + toString(d) + ".\n");
50                         exit(1);
51                 }
52                 if(d == ')' && f.peek() == '\n'){
53                         m->gobble(f);
54                 }       
55                 return d;
56         }
57         catch(exception& e) {
58                 m->errorOut(e, "ReadTree", "readSpecialChar");
59                 exit(1);
60         }
61 }
62 /**************************************************************************************************/
63
64 int ReadTree::readNodeChar(istream& f) {
65         try {
66 //              while(isspace(d=f.get()))               {;}
67                 m->gobble(f);
68                 char d = f.get();
69
70                 if(d == EOF){
71                         m->mothurOut("Error: Input file ends prematurely, expecting a left parenthesis\n");
72                         exit(1);
73                 }
74                 return d;
75         }
76         catch(exception& e) {
77                 m->errorOut(e, "ReadTree", "readNodeChar");
78                 exit(1);
79         }
80 }
81
82 /**************************************************************************************************/
83
84 float ReadTree::readBranchLength(istream& f) {
85     try {
86                 float b;
87         
88                 if(!(f >> b)){
89                         m->mothurOut("Error: Missing branch length in input tree.\n");
90                         exit(1);
91                 }
92                 m->gobble(f);
93                 return b;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "ReadTree", "readBranchLength");
97                 exit(1);
98         }
99 }
100
101 /***********************************************************************/
102 /***********************************************************************/
103
104 //Child Classes Below
105
106 /***********************************************************************/
107 /***********************************************************************/
108 //This class reads a file in Newick form and stores it in a tree.
109
110 int ReadNewickTree::read(TreeMap* tmap) {
111         try {
112                 holder = "";
113                 int c, error;
114                 int comment = 0;
115                 
116                 //if you are not a nexus file 
117                 if ((c = filehandle.peek()) != '#') {  
118                         while((c = filehandle.peek()) != EOF) { 
119                                 while ((c = filehandle.peek()) != EOF) {
120                                         // get past comments
121                                         if(c == '[') {
122                                                 comment = 1;
123                                         }
124                                         if(c == ']'){
125                                                 comment = 0;
126                                         }
127                                         if((c == '(') && (comment != 1)){ break; }
128                                         filehandle.get();
129                                 }
130
131                                 //make new tree
132                                 T = new Tree(tmap); 
133
134                                 numNodes = T->getNumNodes();
135                                 numLeaves = T->getNumLeaves();
136                                 
137                                 error = readTreeString(tmap); 
138                                 
139                                 //save trees for later commands
140                                 Trees.push_back(T); 
141                                 m->gobble(filehandle);
142                         }
143                 //if you are a nexus file
144                 }else if ((c = filehandle.peek()) == '#') {
145                         //get right number of seqs from nexus file.
146                         Tree* temp = new Tree(tmap);  delete temp;
147                         
148                         nexusTranslation(tmap);  //reads file through the translation and updates treemap
149                         while((c = filehandle.peek()) != EOF) { 
150                                 // get past comments
151                                 while ((c = filehandle.peek()) != EOF) {        
152                                         if(holder == "[" || holder == "[!"){
153                                                 comment = 1;
154                                         }
155                                         if(holder == "]"){
156                                                 comment = 0;
157                                         }
158                                         if((holder == "tree" || holder == "end;") && comment != 1){ holder = ""; comment = 0; break;}
159                                         filehandle >> holder;
160                                 }
161                         
162                                 //pass over the "tree rep.6878900 = "
163                                 while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF) ) {;}
164                                         
165                                 if (c == EOF ) { break; }
166                                 filehandle.putback(c);  //put back first ( of tree.
167                                 
168                                 //make new tree
169                                 T = new Tree(tmap); 
170                                 numNodes = T->getNumNodes();
171                                 numLeaves = T->getNumLeaves();
172                                 
173                                 //read tree info
174                                 error = readTreeString(tmap); 
175                                  
176                                 //save trees for later commands
177                                 Trees.push_back(T); 
178                         }
179                 }
180                 
181                 if (error != 0) { readOk = error; } 
182                 
183                 filehandle.close();
184
185                 return readOk;
186         }
187         catch(exception& e) {
188                 m->errorOut(e, "ReadNewickTree", "read");
189                 exit(1);
190         }
191 }
192 /**************************************************************************************************/
193 //This function read the file through the translation of the sequences names and updates treemap.
194 string ReadNewickTree::nexusTranslation(TreeMap* tmap) {
195         try {
196                 
197                 holder = "";
198                 int numSeqs = m->Treenames.size(); //must save this some when we clear old names we can still know how many sequences there were
199                 int comment = 0;
200                 
201                 // get past comments
202                 while(holder != "translate" && holder != "Translate"){  
203                         if(holder == "[" || holder == "[!"){
204                                 comment = 1;
205                         }
206                         if(holder == "]"){
207                                 comment = 0;
208                         }
209                         filehandle >> holder; 
210                         if(holder == "tree" && comment != 1){return holder;}
211                 }
212                 
213                 //update treemap
214                 tmap->namesOfSeqs.clear();
215                 
216                 /*char c;
217                 string number, name;
218                 while ((c = filehandle.peek()) != EOF) {        
219                         
220                         filehandle >> number; 
221                         
222                         if ((number == "tree") || (number == ";") ) {  name = number; break;  }
223                         
224                         filehandle >> name; 
225                         
226                         char lastChar;
227                         if (name.length() != 0) { lastChar = name[name.length()-1]; }
228                         
229                         if ((name == "tree") || (name == ";") ) {  break;  }
230                         
231                         if (lastChar == ',') {  name.erase(name.end()-1); } //erase the comma
232                         */      
233                 
234                 string number, name;
235                 for(int i=0;i<numSeqs;i++){
236                         
237                         filehandle >> number;
238                         filehandle >> name;
239                         name.erase(name.end()-1);  //erase the comma
240                         
241                         //insert new one with new name
242                         string group = tmap->getGroup(name);
243                         tmap->treemap[toString(number)].groupname = group;
244                         tmap->treemap[toString(number)].vectorIndex = tmap->getIndex(name);
245                         //erase old one.  so treemap[sarah].groupnumber is now treemap[1].groupnumber. if number is 1 and name is sarah.
246                         tmap->treemap.erase(name);
247                         tmap->namesOfSeqs.push_back(number);
248                 }
249                 
250                 return name;
251         }
252         catch(exception& e) {
253                 m->errorOut(e, "ReadNewickTree", "nexusTranslation");
254                 exit(1);
255         }
256 }
257
258 /**************************************************************************************************/
259 int ReadNewickTree::readTreeString(TreeMap* tmap) {
260         try {
261                 
262                 int n = 0;
263                 int lc, rc; 
264                 
265                 int rooted = 0;
266         
267                 int ch = filehandle.peek();     
268                 
269                 if(ch == '('){
270                         n = numLeaves;  //number of leaves / sequences, we want node 1 to start where the leaves left off
271
272                         lc = readNewickInt(filehandle, n, T, tmap);
273                         if (lc == -1) { m->mothurOut("error with lc"); m->mothurOutEndLine(); return -1; } //reports an error in reading
274         
275                         if(filehandle.peek()==','){                                                     
276                                 readSpecialChar(filehandle,',',"comma");
277                         }
278                         // ';' means end of tree.                                                                                               
279                         else if((ch=filehandle.peek())==';' || ch=='['){                
280                                 rooted = 1;                                                                     
281                         }       
282                 
283                         if(rooted != 1){                                                                
284                                 rc = readNewickInt(filehandle, n, T, tmap);
285                                 if (rc == -1) { m->mothurOut("error with rc"); m->mothurOutEndLine(); return -1; } //reports an error in reading
286                                 if(filehandle.peek() == ')'){                                   
287                                         readSpecialChar(filehandle,')',"right parenthesis");
288                                 }                                                                                       
289                         }       
290                 }
291                 //note: treeclimber had the code below added - not sure why?
292                 else{
293                         filehandle.putback(ch);
294                         char name[MAX_LINE];
295                         filehandle.get(name, MAX_LINE,'\n');
296                         SKIPLINE(filehandle, ch);
297                 
298                         n = T->getIndex(name);
299
300                         if(n!=0){
301                                 m->mothurOut("Internal error: The only taxon is not taxon 0.\n");
302                                 //exit(1);
303                                 readOk = -1; return -1;
304                         }
305                         lc = rc = -1;
306                 } 
307                 
308                 while(((ch=filehandle.get())!=';') && (filehandle.eof() != true)){;}    
309                                                         
310                 if(rooted != 1){                                                                        
311                         T->tree[n].setChildren(lc,rc);
312                         T->tree[n].setBranchLength(0);
313                         T->tree[n].setParent(-1);
314                         if(lc!=-1){             T->tree[lc].setParent(n);               }
315                         if(rc!=-1){             T->tree[rc].setParent(n);               }
316                 }
317                 
318                 //T->printTree(); cout << endl;
319                 return 0;
320         
321         }
322         catch(exception& e) {
323                 m->errorOut(e, "ReadNewickTree", "readTreeString");
324                 exit(1);
325         }
326 }
327 /**************************************************************************************************/
328
329 int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T, TreeMap* tmap) {
330         try {
331                 
332                 if (m->control_pressed) { return -1; } 
333                 
334                 int c = readNodeChar(f);
335
336                 if(c == '('){
337                 
338                         //to account for multifurcating trees generated by fasttree, we are forcing them to be bifurcating
339                         //read all children
340                         vector<int> childrenNodes;
341                         while(f.peek() != ')'){
342                                 int child = readNewickInt(f, n, T, tmap);
343                                 if (child == -1) { return -1; } //reports an error in reading
344                 //cout << "child = " << child << endl;          
345                                 childrenNodes.push_back(child);
346                                 
347                                 //after a child you either have , or ), check for both
348                                 if(f.peek()==')'){  break;  }
349                                 else if (f.peek()==',') {   readSpecialChar(f,',',"comma");  }
350                                 else {;}
351                         }
352         //cout << childrenNodes.size() << endl;         
353                         if (childrenNodes.size() < 2) {  m->mothurOut("Error in tree, please correct."); m->mothurOutEndLine(); return -1; }
354                         
355                         //then force into 2 node structure
356                         for (int i = 1; i < childrenNodes.size(); i++) {
357                         
358                                 int lc, rc;
359                                 if (i == 1) { lc = childrenNodes[i-1]; rc = childrenNodes[i]; }
360                                 else { lc = n-1; rc = childrenNodes[i]; }
361                         //cout << i << '\t' << lc << '\t' << rc << endl;        
362                                 T->tree[n].setChildren(lc,rc);
363                                 T->tree[lc].setParent(n);
364                                 T->tree[rc].setParent(n);
365                                 
366                                 //T->printTree(); cout << endl;
367                                 n++;
368                         }
369                         
370                         //to account for extra ++ in looping
371                         n--;
372                         
373                         if(f.peek()==')'){      
374                                 readSpecialChar(f,')',"right parenthesis");     
375                                 //to pass over labels in trees
376                                 c=filehandle.get();
377                                 while((c!=',') && (c != -1) && (c!= ':') && (c!=';')&& (c!=')')){ c=filehandle.get(); }
378                                 filehandle.putback(c);
379                         }                       
380                 
381                         if(f.peek() == ':'){                                                                          
382                                 readSpecialChar(f,':',"colon"); 
383                                                                                 
384                                 if(n >= numNodes){ m->mothurOut("Error: Too many nodes in input tree\n");  readOk = -1; return -1; }
385                                 
386                                 T->tree[n].setBranchLength(readBranchLength(f));
387                         }else{
388                                 T->tree[n].setBranchLength(0.0); 
389                         }                                               
390                         
391                         //T->tree[n].setChildren(lc,rc);
392                         //T->tree[lc].setParent(n);
393                         //T->tree[rc].setParent(n);
394                         //T->printTree();  cout << endl;
395                         
396                         return n++;
397                 
398                 }else{
399                         f.putback(c);
400                         string name = "";
401                         char d=f.get();
402                         while(d != ':' && d != ',' && d!=')' && d!='\n'){                                       
403                                 name += d;
404                                 d=f.get();
405                         }
406 //cout << name << endl;
407                         int blen = 0;
408                         if(d == ':')    {               blen = 1;       }               
409                 
410                         f.putback(d);
411                 
412                         //set group info
413                         string group = tmap->getGroup(name);
414                         
415                         //find index in tree of name
416                         int n1 = T->getIndex(name);
417                         
418                         //adds sequence names that are not in group file to the "xxx" group
419                         if(group == "not found") {
420                                 m->mothurOut("Name: " + name + " is not in your groupfile, and will be disregarded. \n");  //readOk = -1; return n1;
421                                 
422                                 tmap->namesOfSeqs.push_back(name);
423                                 tmap->treemap[name].groupname = "xxx";
424                                 
425                                 map<string, int>::iterator it;
426                                 it = tmap->seqsPerGroup.find("xxx");
427                                 if (it == tmap->seqsPerGroup.end()) { //its a new group
428                                         tmap->addGroup("xxx");
429                                         tmap->seqsPerGroup["xxx"] = 1;
430                                 }else {
431                                         tmap->seqsPerGroup["xxx"]++;
432                                 }
433                                 
434                                 group = "xxx";
435                         }
436                         
437                         vector<string> tempGroup; tempGroup.push_back(group);
438                         
439                         T->tree[n1].setGroup(tempGroup);
440                         T->tree[n1].setChildren(-1,-1);
441                 
442                         if(blen == 1){  
443                                 f.get();
444                                 T->tree[n1].setBranchLength(readBranchLength(f));
445                         }else{
446                                 T->tree[n1].setBranchLength(0.0);
447                         }
448                 
449                         while((c=f.get())!=0 && (c != ':' && c != ',' && c!=')') )              {;}             
450         
451                         f.putback(c);
452                 
453                         return n1;
454                 }
455         }
456         catch(exception& e) {
457                 m->errorOut(e, "ReadNewickTree", "readNewickInt");
458                 exit(1);
459         }
460 }
461 /**************************************************************************************************/
462 /**************************************************************************************************/
463