]> git.donarmstrong.com Git - mothur.git/blob - readtree.cpp
working on chimeras
[mothur.git] / readtree.cpp
1 /*
2  *  readtree.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/22/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "readtree.h"
11
12 /***********************************************************************/
13 ReadTree::ReadTree() {
14         try {
15                 globaldata = GlobalData::getInstance();
16                 globaldata->gTree.clear();
17         }
18         catch(exception& e) {
19                 errorOut(e, "ReadTree", "ReadTree");
20                 exit(1);
21         }
22 }
23 /***********************************************************************/
24 int ReadTree::readSpecialChar(istream& f, char c, string name) {
25     try {
26         
27                 gobble(f);
28                 char d = f.get();
29         
30                 if(d == EOF){
31                         mothurOut("Error: Input file ends prematurely, expecting a " + name + "\n");
32                         exit(1);
33                 }
34                 if(d != c){
35                         mothurOut("Error: Expected " + name + " in input file.  Found " + toString(d) + ".\n");
36                         exit(1);
37                 }
38                 if(d == ')' && f.peek() == '\n'){
39                         gobble(f);
40                 }       
41                 return d;
42         }
43         catch(exception& e) {
44                 errorOut(e, "ReadTree", "readSpecialChar");
45                 exit(1);
46         }
47 }
48 /**************************************************************************************************/
49
50 int ReadTree::readNodeChar(istream& f) {
51         try {
52 //              while(isspace(d=f.get()))               {;}
53                 gobble(f);
54                 char d = f.get();
55
56                 if(d == EOF){
57                         mothurOut("Error: Input file ends prematurely, expecting a left parenthesis\n");
58                         exit(1);
59                 }
60                 return d;
61         }
62         catch(exception& e) {
63                 errorOut(e, "ReadTree", "readNodeChar");
64                 exit(1);
65         }
66 }
67
68 /**************************************************************************************************/
69
70 float ReadTree::readBranchLength(istream& f) {
71     try {
72                 float b;
73         
74                 if(!(f >> b)){
75                         mothurOut("Error: Missing branch length in input tree.\n");
76                         exit(1);
77                 }
78                 gobble(f);
79                 return b;
80         }
81         catch(exception& e) {
82                 errorOut(e, "ReadTree", "readBranchLength");
83                 exit(1);
84         }
85 }
86
87 /***********************************************************************/
88 /***********************************************************************/
89
90 //Child Classes Below
91
92 /***********************************************************************/
93 /***********************************************************************/
94 //This class reads a file in Newick form and stores it in a tree.
95
96 int ReadNewickTree::read() {
97         try {
98                 holder = "";
99                 int c, error;
100                 int comment = 0;
101                 
102                 //if you are not a nexus file 
103                 if ((c = filehandle.peek()) != '#') {  
104                         while((c = filehandle.peek()) != EOF) { 
105                                 while ((c = filehandle.peek()) != EOF) {
106                                         // get past comments
107                                         if(c == '[') {
108                                                 comment = 1;
109                                         }
110                                         if(c == ']'){
111                                                 comment = 0;
112                                         }
113                                         if((c == '(') && (comment != 1)){ break; }
114                                         filehandle.get();
115                                 }
116
117                                 //make new tree
118                                 T = new Tree(); 
119
120                                 numNodes = T->getNumNodes();
121                                 numLeaves = T->getNumLeaves();
122                                 
123                                 error = readTreeString(); 
124                                 
125                                 //save trees for later commands
126                                 globaldata->gTree.push_back(T); 
127                                 gobble(filehandle);
128                         }
129                 //if you are a nexus file
130                 }else if ((c = filehandle.peek()) == '#') {
131                         nexusTranslation();  //reads file through the translation and updates treemap
132                         while((c = filehandle.peek()) != EOF) { 
133                                 // get past comments
134                                 while ((c = filehandle.peek()) != EOF) {        
135                                         if(holder == "[" || holder == "[!"){
136                                                 comment = 1;
137                                         }
138                                         if(holder == "]"){
139                                                 comment = 0;
140                                         }
141                                         if((holder == "tree" || holder == "end;") && comment != 1){ holder = ""; comment = 0; break;}
142                                         filehandle >> holder;
143                                 }
144                         
145                                 //pass over the "tree rep.6878900 = "
146                                 while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF) ) {;}
147                                         
148                                 if (c == EOF ) { break; }
149                                 filehandle.putback(c);  //put back first ( of tree.
150                                 
151                                 //make new tree
152                                 T = new Tree(); 
153                                 numNodes = T->getNumNodes();
154                                 numLeaves = T->getNumLeaves();
155                                 
156                                 //read tree info
157                                 error = readTreeString(); 
158                                  
159                                 //save trees for later commands
160                                 globaldata->gTree.push_back(T); 
161                         }
162                 }
163                 
164                 if (error != 0) { readOk = error; } 
165                 
166                 filehandle.close();
167                 return readOk;
168         }
169         catch(exception& e) {
170                 errorOut(e, "ReadNewickTree", "read");
171                 exit(1);
172         }
173 }
174 /**************************************************************************************************/
175 //This function read the file through the translation of the sequences names and updates treemap.
176 void ReadNewickTree::nexusTranslation() {
177         try {
178                 
179                 holder = "";
180                 int numSeqs = globaldata->gTreemap->getNumSeqs(); //must save this some when we clear old names we can still know how many sequences there were
181                 int comment = 0;
182                 
183                 // get past comments
184                 while(holder != "translate" && holder != "Translate"){  
185                         if(holder == "[" || holder == "[!"){
186                                 comment = 1;
187                         }
188                         if(holder == "]"){
189                                 comment = 0;
190                         }
191                         filehandle >> holder; 
192                         if(holder == "tree" && comment != 1){return;}
193                 }
194                 
195                 //update treemap
196                 globaldata->gTreemap->namesOfSeqs.clear();
197                 for(int i=0;i<numSeqs;i++){
198                         string number, name;
199                         filehandle >> number;
200                         filehandle >> name;
201                         name.erase(name.end()-1);  //erase the comma
202                         //insert new one with new name
203                         globaldata->gTreemap->treemap[toString(number)].groupname = globaldata->gTreemap->treemap[name].groupname;
204                         globaldata->gTreemap->treemap[toString(number)].vectorIndex = globaldata->gTreemap->treemap[name].vectorIndex;
205                         //erase old one.  so treemap[sarah].groupnumber is now treemap[1].groupnumber. if number is 1 and name is sarah.
206                         globaldata->gTreemap->treemap.erase(name);
207                         globaldata->gTreemap->namesOfSeqs.push_back(number);
208                 }
209         }
210         catch(exception& e) {
211                 errorOut(e, "ReadNewickTree", "nexusTranslation");
212                 exit(1);
213         }
214 }
215
216 /**************************************************************************************************/
217 int ReadNewickTree::readTreeString() {
218         try {
219                 
220                 int n = 0;
221                 int lc, rc; 
222                 
223                 int rooted = 0;
224         
225                 int ch = filehandle.peek();     
226                 
227                 if(ch == '('){
228                         n = numLeaves;  //number of leaves / sequences, we want node 1 to start where the leaves left off
229
230                         lc = readNewickInt(filehandle, n, T);
231                         if (lc == -1) { mothurOut("error with lc"); mothurOutEndLine(); return -1; } //reports an error in reading
232                 
233                         if(filehandle.peek()==','){                                                     
234                                 readSpecialChar(filehandle,',',"comma");
235                         }
236                         // ';' means end of tree.                                                                                               
237                         else if((ch=filehandle.peek())==';' || ch=='['){                
238                                 rooted = 1;                                                                     
239                         }                                                                                               
240                         if(rooted != 1){                                                                
241                                 rc = readNewickInt(filehandle, n, T);
242                                 if (rc == -1) { mothurOut("error with rc"); mothurOutEndLine(); return -1; } //reports an error in reading
243                                 if(filehandle.peek() == ')'){                                   
244                                         readSpecialChar(filehandle,')',"right parenthesis");
245                                 }                                                                                       
246                         }                                                                                               
247                 }
248                 //note: treeclimber had the code below added - not sure why?
249                 else{
250                         filehandle.putback(ch);
251                         char name[MAX_LINE];
252                         filehandle.get(name, MAX_LINE,'\n');
253                         SKIPLINE(filehandle, ch);
254                 
255                         n = T->getIndex(name);
256
257                         if(n!=0){
258                                 mothurOut("Internal error: The only taxon is not taxon 0.\n");
259                                 //exit(1);
260                                 readOk = -1; return -1;
261                         }
262                         lc = rc = -1;
263                 } 
264                 
265                 while(((ch=filehandle.get())!=';') && (filehandle.eof() != true)){;}                                            
266                 if(rooted != 1){                                                                        
267                         T->tree[n].setChildren(lc,rc);
268                         T->tree[n].setBranchLength(0);
269                         T->tree[n].setParent(-1);
270                         if(lc!=-1){             T->tree[lc].setParent(n);               }
271                         if(rc!=-1){             T->tree[rc].setParent(n);               }
272                 }
273                 return 0;
274         
275         }
276         catch(exception& e) {
277                 errorOut(e, "ReadNewickTree", "readTreeString");
278                 exit(1);
279         }
280 }
281 /**************************************************************************************************/
282
283 int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) {
284         try {
285                 int c = readNodeChar(f);
286     
287                 if(c == '('){
288                         int lc = readNewickInt(f, n, T);
289                         if (lc == -1) { return -1; } //reports an error in reading
290                         readSpecialChar(f,',',"comma");
291
292                         int rc = readNewickInt(f, n, T);
293                         if (rc == -1) { return -1; }  //reports an error in reading     
294                         if(f.peek()==')'){      
295                                 readSpecialChar(f,')',"right parenthesis");     
296                                 //to pass over labels in trees
297                                 c=filehandle.get();
298                                 while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ c=filehandle.get(); }
299                                 filehandle.putback(c);
300
301                         }                       
302                 
303                         if(f.peek() == ':'){                                                                          
304                                 readSpecialChar(f,':',"colon"); 
305                                                                                 
306                                 if(n >= numNodes){      mothurOut("Error: Too many nodes in input tree\n");  readOk = -1; return -1; }
307                                 
308                                 T->tree[n].setBranchLength(readBranchLength(f));
309                         }else{
310                                 T->tree[n].setBranchLength(0.0); 
311                         }                                               
312                 
313                         T->tree[n].setChildren(lc,rc);
314                         T->tree[lc].setParent(n);
315                         T->tree[rc].setParent(n);
316                 
317                         return n++;
318                 }else{
319                         f.putback(c);
320                         string name = "";
321                         char d=f.get();
322                         while(d != ':' && d != ',' && d!=')' && d!='\n'){                                       
323                                 name += d;
324                                 d=f.get();
325                         }
326                 
327                         int blen = 0;
328                         if(d == ':')    {               blen = 1;       }               
329                 
330                         f.putback(d);
331                 
332                         //set group info
333                         string group = globaldata->gTreemap->getGroup(name);
334                         
335                         //find index in tree of name
336                         int n1 = T->getIndex(name);
337                         
338                         //adds sequence names that are not in group file to the "xxx" group
339                         if(group == "not found") {
340                                 mothurOut("Name: " + name + " is not in your groupfile, and will be disregarded. \n");  //readOk = -1; return n1;
341                                 
342                                 globaldata->gTreemap->namesOfSeqs.push_back(name);
343                                 globaldata->gTreemap->treemap[name].groupname = "xxx";
344                                 
345                                 map<string, int>::iterator it;
346                                 it = globaldata->gTreemap->seqsPerGroup.find("xxx");
347                                 if (it == globaldata->gTreemap->seqsPerGroup.end()) { //its a new group
348                                         globaldata->gTreemap->namesOfGroups.push_back("xxx");
349                                         globaldata->gTreemap->seqsPerGroup["xxx"] = 1;
350                                 }else {
351                                         globaldata->gTreemap->seqsPerGroup["xxx"]++;
352                                 }
353                                 
354                                 group = "xxx";
355                         }
356                         
357                         vector<string> tempGroup; tempGroup.push_back(group);
358                         
359                         T->tree[n1].setGroup(tempGroup);
360                         T->tree[n1].setChildren(-1,-1);
361                 
362                         if(blen == 1){  
363                                 f.get();
364                                 T->tree[n1].setBranchLength(readBranchLength(f));
365                         }else{
366                                 T->tree[n1].setBranchLength(0.0);
367                         }
368                 
369                         while((c=f.get())!=0 && (c != ':' && c != ',' && c!=')') )              {;}             
370                         f.putback(c);
371                 
372                         return n1;
373                 }
374         }
375         catch(exception& e) {
376                 errorOut(e, "ReadNewickTree", "readNewickInt");
377                 exit(1);
378         }
379 }
380 /**************************************************************************************************/
381 /**************************************************************************************************/
382