]> git.donarmstrong.com Git - mothur.git/blob - readtree.cpp
added logfile feature
[mothur.git] / readtree.cpp
1 /*
2  *  readtree.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/22/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "readtree.h"
11
12 /***********************************************************************/
13 ReadTree::ReadTree() {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 globaldata->gTree.clear();
17         }
18         catch(exception& e) {
19                 errorOut(e, "ReadTree", "ReadTree");
20                 exit(1);
21         }
22 }
23 /***********************************************************************/
24 int ReadTree::readSpecialChar(istream& f, char c, string name) {
25     try {
26         
27                 gobble(f);
28                 char d = f.get();
29         
30                 if(d == EOF){
31                         mothurOut("Error: Input file ends prematurely, expecting a " + name + "\n");
32                         exit(1);
33                 }
34                 if(d != c){
35                         mothurOut("Error: Expected " + name + " in input file.  Found " + toString(d) + ".\n");
36                         exit(1);
37                 }
38                 if(d == ')' && f.peek() == '\n'){
39                         gobble(f);
40                 }       
41                 return d;
42         }
43         catch(exception& e) {
44                 errorOut(e, "ReadTree", "readSpecialChar");
45                 exit(1);
46         }
47 }
48 /**************************************************************************************************/
49
50 int ReadTree::readNodeChar(istream& f) {
51         try {
52 //              while(isspace(d=f.get()))               {;}
53                 gobble(f);
54                 char d = f.get();
55
56                 if(d == EOF){
57                         mothurOut("Error: Input file ends prematurely, expecting a left parenthesis\n");
58                         exit(1);
59                 }
60                 return d;
61         }
62         catch(exception& e) {
63                 errorOut(e, "ReadTree", "readNodeChar");
64                 exit(1);
65         }
66 }
67
68 /**************************************************************************************************/
69
70 float ReadTree::readBranchLength(istream& f) {
71     try {
72                 float b;
73         
74                 if(!(f >> b)){
75                         mothurOut("Error: Missing branch length in input tree.\n");
76                         exit(1);
77                 }
78                 gobble(f);
79                 return b;
80         }
81         catch(exception& e) {
82                 errorOut(e, "ReadTree", "readBranchLength");
83                 exit(1);
84         }
85 }
86
87 /***********************************************************************/
88 /***********************************************************************/
89
90 //Child Classes Below
91
92 /***********************************************************************/
93 /***********************************************************************/
94 //This class reads a file in Newick form and stores it in a tree.
95
96 int ReadNewickTree::read() {
97         try {
98                 holder = "";
99                 int c, error;
100                 int comment = 0;
101                 
102                 //if you are not a nexus file 
103                 if ((c = filehandle.peek()) != '#') {  
104                         while((c = filehandle.peek()) != EOF) { 
105                                 while ((c = filehandle.peek()) != EOF) {
106                                         // get past comments
107                                         if(c == '[') {
108                                                 comment = 1;
109                                         }
110                                         if(c == ']'){
111                                                 comment = 0;
112                                         }
113                                         if((c == '(') && (comment != 1)){ break; }
114                                         filehandle.get();
115                                 }
116
117                                 //make new tree
118                                 T = new Tree(); 
119                                 numNodes = T->getNumNodes();
120                                 numLeaves = T->getNumLeaves();
121                                 
122                                 error = readTreeString(); 
123                                 
124                                 //save trees for later commands
125                                 globaldata->gTree.push_back(T); 
126                                 gobble(filehandle);
127                         }
128                 //if you are a nexus file
129                 }else if ((c = filehandle.peek()) == '#') {
130                         nexusTranslation();  //reads file through the translation and updates treemap
131                         while((c = filehandle.peek()) != EOF) { 
132                                 // get past comments
133                                 while ((c = filehandle.peek()) != EOF) {        
134                                         if(holder == "[" || holder == "[!"){
135                                                 comment = 1;
136                                         }
137                                         if(holder == "]"){
138                                                 comment = 0;
139                                         }
140                                         if((holder == "tree" || holder == "end;") && comment != 1){ holder = ""; comment = 0; break;}
141                                         filehandle >> holder;
142                                 }
143                         
144                                 //pass over the "tree rep.6878900 = "
145                                 while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF) ) {;}
146                                         
147                                 if (c == EOF ) { break; }
148                                 filehandle.putback(c);  //put back first ( of tree.
149                                 
150                                 //make new tree
151                                 T = new Tree(); 
152                                 numNodes = T->getNumNodes();
153                                 numLeaves = T->getNumLeaves();
154                                 
155                                 //read tree info
156                                 error = readTreeString(); 
157                                  
158                                 //save trees for later commands
159                                 globaldata->gTree.push_back(T); 
160                         }
161                 }
162                 
163                 if (error != 0) { readOk = error; } 
164                 
165                 filehandle.close();
166                 return readOk;
167         }
168         catch(exception& e) {
169                 errorOut(e, "ReadNewickTree", "read");
170                 exit(1);
171         }
172 }
173 /**************************************************************************************************/
174 //This function read the file through the translation of the sequences names and updates treemap.
175 void ReadNewickTree::nexusTranslation() {
176         try {
177                 
178                 holder = "";
179                 int numSeqs = globaldata->gTreemap->getNumSeqs(); //must save this some when we clear old names we can still know how many sequences there were
180                 int comment = 0;
181                 
182                 // get past comments
183                 while(holder != "translate" && holder != "Translate"){  
184                         if(holder == "[" || holder == "[!"){
185                                 comment = 1;
186                         }
187                         if(holder == "]"){
188                                 comment = 0;
189                         }
190                         filehandle >> holder; 
191                         if(holder == "tree" && comment != 1){return;}
192                 }
193                 
194                 //update treemap
195                 globaldata->gTreemap->namesOfSeqs.clear();
196                 for(int i=0;i<numSeqs;i++){
197                         string number, name;
198                         filehandle >> number;
199                         filehandle >> name;
200                         name.erase(name.end()-1);  //erase the comma
201                         //insert new one with new name
202                         globaldata->gTreemap->treemap[toString(number)].groupname = globaldata->gTreemap->treemap[name].groupname;
203                         globaldata->gTreemap->treemap[toString(number)].vectorIndex = globaldata->gTreemap->treemap[name].vectorIndex;
204                         //erase old one.  so treemap[sarah].groupnumber is now treemap[1].groupnumber. if number is 1 and name is sarah.
205                         globaldata->gTreemap->treemap.erase(name);
206                         globaldata->gTreemap->namesOfSeqs.push_back(number);
207                 }
208         }
209         catch(exception& e) {
210                 errorOut(e, "ReadNewickTree", "nexusTranslation");
211                 exit(1);
212         }
213 }
214
215 /**************************************************************************************************/
216 int ReadNewickTree::readTreeString() {
217         try {
218                 
219                 int n = 0;
220                 int lc, rc; 
221                 
222                 int rooted = 0;
223         
224                 int ch = filehandle.peek();     
225                 
226                 if(ch == '('){
227                         n = numLeaves;  //number of leaves / sequences, we want node 1 to start where the leaves left off
228
229                         lc = readNewickInt(filehandle, n, T);
230                         if (lc == -1) { mothurOut("error with lc"); mothurOutEndLine(); return -1; } //reports an error in reading
231                 
232                         if(filehandle.peek()==','){                                                     
233                                 readSpecialChar(filehandle,',',"comma");
234                         }
235                         // ';' means end of tree.                                                                                               
236                         else if((ch=filehandle.peek())==';' || ch=='['){                
237                                 rooted = 1;                                                                     
238                         }                                                                                               
239                         if(rooted != 1){                                                                
240                                 rc = readNewickInt(filehandle, n, T);
241                                 if (rc == -1) { mothurOut("error with rc"); mothurOutEndLine(); return -1; } //reports an error in reading
242                                 if(filehandle.peek() == ')'){                                   
243                                         readSpecialChar(filehandle,')',"right parenthesis");
244                                 }                                                                                       
245                         }                                                                                               
246                 }
247                 //note: treeclimber had the code below added - not sure why?
248                 else{
249                         filehandle.putback(ch);
250                         char name[MAX_LINE];
251                         filehandle.get(name, MAX_LINE,'\n');
252                         SKIPLINE(filehandle, ch);
253                 
254                         n = T->getIndex(name);
255
256                         if(n!=0){
257                                 mothurOut("Internal error: The only taxon is not taxon 0.\n");
258                                 //exit(1);
259                                 readOk = -1; return -1;
260                         }
261                         lc = rc = -1;
262                 } 
263                 
264                 while(((ch=filehandle.get())!=';') && (filehandle.eof() != true)){;}                                            
265                 if(rooted != 1){                                                                        
266                         T->tree[n].setChildren(lc,rc);
267                         T->tree[n].setBranchLength(0);
268                         T->tree[n].setParent(-1);
269                         if(lc!=-1){             T->tree[lc].setParent(n);               }
270                         if(rc!=-1){             T->tree[rc].setParent(n);               }
271                 }
272                 return 0;
273         
274         }
275         catch(exception& e) {
276                 errorOut(e, "ReadNewickTree", "readTreeString");
277                 exit(1);
278         }
279 }
280 /**************************************************************************************************/
281
282 int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) {
283         try {
284                 int c = readNodeChar(f);
285     
286                 if(c == '('){
287                         int lc = readNewickInt(f, n, T);
288                         if (lc == -1) { return -1; } //reports an error in reading
289                         readSpecialChar(f,',',"comma");
290
291                         int rc = readNewickInt(f, n, T);
292                         if (rc == -1) { return -1; }  //reports an error in reading     
293                         if(f.peek()==')'){      
294                                 readSpecialChar(f,')',"right parenthesis");     
295                                 //to pass over labels in trees
296                                 c=filehandle.get();
297                                 while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ c=filehandle.get(); }
298                                 filehandle.putback(c);
299
300                         }                       
301                 
302                         if(f.peek() == ':'){                                                                          
303                                 readSpecialChar(f,':',"colon"); 
304                                                                                 
305                                 if(n >= numNodes){      mothurOut("Error: Too many nodes in input tree\n");  readOk = -1; return -1; }
306                                 
307                                 T->tree[n].setBranchLength(readBranchLength(f));
308                         }else{
309                                 T->tree[n].setBranchLength(0.0); 
310                         }                                               
311                 
312                         T->tree[n].setChildren(lc,rc);
313                         T->tree[lc].setParent(n);
314                         T->tree[rc].setParent(n);
315                 
316                         return n++;
317                 }else{
318                         f.putback(c);
319                         string name = "";
320                         char d=f.get();
321                         while(d != ':' && d != ',' && d!=')' && d!='\n'){                                       
322                                 name += d;
323                                 d=f.get();
324                         }
325                 
326                         int blen = 0;
327                         if(d == ':')    {               blen = 1;       }               
328                 
329                         f.putback(d);
330                 
331                         //set group info
332                         string group = globaldata->gTreemap->getGroup(name);
333                         
334                         //find index in tree of name
335                         int n1 = T->getIndex(name);
336                         
337                         //adds sequence names that are not in group file to the "xxx" group
338                         if(group == "not found") {
339                                 mothurOut("Name: " + name + " is not in your groupfile, and will be disregarded. \n");  //readOk = -1; return n1;
340                                 
341                                 globaldata->gTreemap->namesOfSeqs.push_back(name);
342                                 globaldata->gTreemap->treemap[name].groupname = "xxx";
343                                 
344                                 map<string, int>::iterator it;
345                                 it = globaldata->gTreemap->seqsPerGroup.find("xxx");
346                                 if (it == globaldata->gTreemap->seqsPerGroup.end()) { //its a new group
347                                         globaldata->gTreemap->namesOfGroups.push_back("xxx");
348                                         globaldata->gTreemap->seqsPerGroup["xxx"] = 1;
349                                 }else {
350                                         globaldata->gTreemap->seqsPerGroup["xxx"]++;
351                                 }
352                                 
353                                 group = "xxx";
354                         }
355                         
356                         T->tree[n1].setGroup(group);
357                         T->tree[n1].setChildren(-1,-1);
358                 
359                         if(blen == 1){  
360                                 f.get();
361                                 T->tree[n1].setBranchLength(readBranchLength(f));
362                         }else{
363                                 T->tree[n1].setBranchLength(0.0);
364                         }
365                 
366                         while((c=f.get())!=0 && (c != ':' && c != ',' && c!=')') )              {;}             
367                         f.putback(c);
368                 
369                         return n1;
370                 }
371         }
372         catch(exception& e) {
373                 errorOut(e, "ReadNewickTree", "readNewickInt");
374                 exit(1);
375         }
376 }
377 /**************************************************************************************************/
378 /**************************************************************************************************/
379