5 * Created by Sarah Westcott on 1/22/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
12 /***********************************************************************/
13 ReadTree::ReadTree() {
15 globaldata = GlobalData::getInstance();
16 globaldata->gTree.clear();
19 errorOut(e, "ReadTree", "ReadTree");
23 /***********************************************************************/
24 int ReadTree::readSpecialChar(istream& f, char c, string name) {
31 mothurOut("Error: Input file ends prematurely, expecting a " + name + "\n");
35 mothurOut("Error: Expected " + name + " in input file. Found " + toString(d) + ".\n");
38 if(d == ')' && f.peek() == '\n'){
44 errorOut(e, "ReadTree", "readSpecialChar");
48 /**************************************************************************************************/
50 int ReadTree::readNodeChar(istream& f) {
52 // while(isspace(d=f.get())) {;}
57 mothurOut("Error: Input file ends prematurely, expecting a left parenthesis\n");
63 errorOut(e, "ReadTree", "readNodeChar");
68 /**************************************************************************************************/
70 float ReadTree::readBranchLength(istream& f) {
75 mothurOut("Error: Missing branch length in input tree.\n");
82 errorOut(e, "ReadTree", "readBranchLength");
87 /***********************************************************************/
88 /***********************************************************************/
92 /***********************************************************************/
93 /***********************************************************************/
94 //This class reads a file in Newick form and stores it in a tree.
96 int ReadNewickTree::read() {
102 //if you are not a nexus file
103 if ((c = filehandle.peek()) != '#') {
104 while((c = filehandle.peek()) != EOF) {
105 while ((c = filehandle.peek()) != EOF) {
113 if((c == '(') && (comment != 1)){ break; }
120 numNodes = T->getNumNodes();
121 numLeaves = T->getNumLeaves();
123 error = readTreeString();
125 //save trees for later commands
126 globaldata->gTree.push_back(T);
129 //if you are a nexus file
130 }else if ((c = filehandle.peek()) == '#') {
131 nexusTranslation(); //reads file through the translation and updates treemap
132 while((c = filehandle.peek()) != EOF) {
134 while ((c = filehandle.peek()) != EOF) {
135 if(holder == "[" || holder == "[!"){
141 if((holder == "tree" || holder == "end;") && comment != 1){ holder = ""; comment = 0; break;}
142 filehandle >> holder;
145 //pass over the "tree rep.6878900 = "
146 while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF) ) {;}
148 if (c == EOF ) { break; }
149 filehandle.putback(c); //put back first ( of tree.
153 numNodes = T->getNumNodes();
154 numLeaves = T->getNumLeaves();
157 error = readTreeString();
159 //save trees for later commands
160 globaldata->gTree.push_back(T);
164 if (error != 0) { readOk = error; }
169 catch(exception& e) {
170 errorOut(e, "ReadNewickTree", "read");
174 /**************************************************************************************************/
175 //This function read the file through the translation of the sequences names and updates treemap.
176 void ReadNewickTree::nexusTranslation() {
180 int numSeqs = globaldata->gTreemap->getNumSeqs(); //must save this some when we clear old names we can still know how many sequences there were
184 while(holder != "translate" && holder != "Translate"){
185 if(holder == "[" || holder == "[!"){
191 filehandle >> holder;
192 if(holder == "tree" && comment != 1){return;}
196 globaldata->gTreemap->namesOfSeqs.clear();
197 for(int i=0;i<numSeqs;i++){
199 filehandle >> number;
201 name.erase(name.end()-1); //erase the comma
202 //insert new one with new name
203 globaldata->gTreemap->treemap[toString(number)].groupname = globaldata->gTreemap->treemap[name].groupname;
204 globaldata->gTreemap->treemap[toString(number)].vectorIndex = globaldata->gTreemap->treemap[name].vectorIndex;
205 //erase old one. so treemap[sarah].groupnumber is now treemap[1].groupnumber. if number is 1 and name is sarah.
206 globaldata->gTreemap->treemap.erase(name);
207 globaldata->gTreemap->namesOfSeqs.push_back(number);
210 catch(exception& e) {
211 errorOut(e, "ReadNewickTree", "nexusTranslation");
216 /**************************************************************************************************/
217 int ReadNewickTree::readTreeString() {
225 int ch = filehandle.peek();
228 n = numLeaves; //number of leaves / sequences, we want node 1 to start where the leaves left off
230 lc = readNewickInt(filehandle, n, T);
231 if (lc == -1) { mothurOut("error with lc"); mothurOutEndLine(); return -1; } //reports an error in reading
233 if(filehandle.peek()==','){
234 readSpecialChar(filehandle,',',"comma");
236 // ';' means end of tree.
237 else if((ch=filehandle.peek())==';' || ch=='['){
241 rc = readNewickInt(filehandle, n, T);
242 if (rc == -1) { mothurOut("error with rc"); mothurOutEndLine(); return -1; } //reports an error in reading
243 if(filehandle.peek() == ')'){
244 readSpecialChar(filehandle,')',"right parenthesis");
248 //note: treeclimber had the code below added - not sure why?
250 filehandle.putback(ch);
252 filehandle.get(name, MAX_LINE,'\n');
253 SKIPLINE(filehandle, ch);
255 n = T->getIndex(name);
258 mothurOut("Internal error: The only taxon is not taxon 0.\n");
260 readOk = -1; return -1;
265 while(((ch=filehandle.get())!=';') && (filehandle.eof() != true)){;}
267 T->tree[n].setChildren(lc,rc);
268 T->tree[n].setBranchLength(0);
269 T->tree[n].setParent(-1);
270 if(lc!=-1){ T->tree[lc].setParent(n); }
271 if(rc!=-1){ T->tree[rc].setParent(n); }
276 catch(exception& e) {
277 errorOut(e, "ReadNewickTree", "readTreeString");
281 /**************************************************************************************************/
283 int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) {
285 int c = readNodeChar(f);
288 int lc = readNewickInt(f, n, T);
289 if (lc == -1) { return -1; } //reports an error in reading
290 readSpecialChar(f,',',"comma");
292 int rc = readNewickInt(f, n, T);
293 if (rc == -1) { return -1; } //reports an error in reading
295 readSpecialChar(f,')',"right parenthesis");
296 //to pass over labels in trees
298 while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ c=filehandle.get(); }
299 filehandle.putback(c);
304 readSpecialChar(f,':',"colon");
306 if(n >= numNodes){ mothurOut("Error: Too many nodes in input tree\n"); readOk = -1; return -1; }
308 T->tree[n].setBranchLength(readBranchLength(f));
310 T->tree[n].setBranchLength(0.0);
313 T->tree[n].setChildren(lc,rc);
314 T->tree[lc].setParent(n);
315 T->tree[rc].setParent(n);
322 while(d != ':' && d != ',' && d!=')' && d!='\n'){
328 if(d == ':') { blen = 1; }
333 string group = globaldata->gTreemap->getGroup(name);
335 //find index in tree of name
336 int n1 = T->getIndex(name);
338 //adds sequence names that are not in group file to the "xxx" group
339 if(group == "not found") {
340 mothurOut("Name: " + name + " is not in your groupfile, and will be disregarded. \n"); //readOk = -1; return n1;
342 globaldata->gTreemap->namesOfSeqs.push_back(name);
343 globaldata->gTreemap->treemap[name].groupname = "xxx";
345 map<string, int>::iterator it;
346 it = globaldata->gTreemap->seqsPerGroup.find("xxx");
347 if (it == globaldata->gTreemap->seqsPerGroup.end()) { //its a new group
348 globaldata->gTreemap->namesOfGroups.push_back("xxx");
349 globaldata->gTreemap->seqsPerGroup["xxx"] = 1;
351 globaldata->gTreemap->seqsPerGroup["xxx"]++;
357 T->tree[n1].setGroup(group);
358 T->tree[n1].setChildren(-1,-1);
362 T->tree[n1].setBranchLength(readBranchLength(f));
364 T->tree[n1].setBranchLength(0.0);
367 while((c=f.get())!=0 && (c != ':' && c != ',' && c!=')') ) {;}
373 catch(exception& e) {
374 errorOut(e, "ReadNewickTree", "readNewickInt");
378 /**************************************************************************************************/
379 /**************************************************************************************************/