5 * Created by Sarah Westcott on 1/22/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
12 /***********************************************************************/
13 ReadTree::ReadTree() {
15 globaldata = GlobalData::getInstance();
16 m = MothurOut::getInstance();
17 globaldata->gTree.clear();
20 m->errorOut(e, "ReadTree", "ReadTree");
24 /***********************************************************************/
25 int ReadTree::readSpecialChar(istream& f, char c, string name) {
32 m->mothurOut("Error: Input file ends prematurely, expecting a " + name + "\n");
36 m->mothurOut("Error: Expected " + name + " in input file. Found " + toString(d) + ".\n");
39 if(d == ')' && f.peek() == '\n'){
45 m->errorOut(e, "ReadTree", "readSpecialChar");
49 /**************************************************************************************************/
51 int ReadTree::readNodeChar(istream& f) {
53 // while(isspace(d=f.get())) {;}
58 m->mothurOut("Error: Input file ends prematurely, expecting a left parenthesis\n");
64 m->errorOut(e, "ReadTree", "readNodeChar");
69 /**************************************************************************************************/
71 float ReadTree::readBranchLength(istream& f) {
76 m->mothurOut("Error: Missing branch length in input tree.\n");
83 m->errorOut(e, "ReadTree", "readBranchLength");
88 /***********************************************************************/
89 /***********************************************************************/
93 /***********************************************************************/
94 /***********************************************************************/
95 //This class reads a file in Newick form and stores it in a tree.
97 int ReadNewickTree::read() {
103 //if you are not a nexus file
104 if ((c = filehandle.peek()) != '#') {
105 while((c = filehandle.peek()) != EOF) {
106 while ((c = filehandle.peek()) != EOF) {
114 if((c == '(') && (comment != 1)){ break; }
121 numNodes = T->getNumNodes();
122 numLeaves = T->getNumLeaves();
124 error = readTreeString();
126 //save trees for later commands
127 globaldata->gTree.push_back(T);
128 m->gobble(filehandle);
130 //if you are a nexus file
131 }else if ((c = filehandle.peek()) == '#') {
132 //get right number of seqs from nexus file.
133 Tree* temp = new Tree(); delete temp;
135 nexusTranslation(); //reads file through the translation and updates treemap
136 while((c = filehandle.peek()) != EOF) {
138 while ((c = filehandle.peek()) != EOF) {
139 if(holder == "[" || holder == "[!"){
145 if((holder == "tree" || holder == "end;") && comment != 1){ holder = ""; comment = 0; break;}
146 filehandle >> holder;
149 //pass over the "tree rep.6878900 = "
150 while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF) ) {;}
152 if (c == EOF ) { break; }
153 filehandle.putback(c); //put back first ( of tree.
157 numNodes = T->getNumNodes();
158 numLeaves = T->getNumLeaves();
161 error = readTreeString();
163 //save trees for later commands
164 globaldata->gTree.push_back(T);
168 if (error != 0) { readOk = error; }
174 catch(exception& e) {
175 m->errorOut(e, "ReadNewickTree", "read");
179 /**************************************************************************************************/
180 //This function read the file through the translation of the sequences names and updates treemap.
181 string ReadNewickTree::nexusTranslation() {
185 int numSeqs = globaldata->Treenames.size(); //must save this some when we clear old names we can still know how many sequences there were
189 while(holder != "translate" && holder != "Translate"){
190 if(holder == "[" || holder == "[!"){
196 filehandle >> holder;
197 if(holder == "tree" && comment != 1){return holder;}
201 globaldata->gTreemap->namesOfSeqs.clear();
205 while ((c = filehandle.peek()) != EOF) {
207 filehandle >> number;
209 if ((number == "tree") || (number == ";") ) { name = number; break; }
214 if (name.length() != 0) { lastChar = name[name.length()-1]; }
216 if ((name == "tree") || (name == ";") ) { break; }
218 if (lastChar == ',') { name.erase(name.end()-1); } //erase the comma
222 for(int i=0;i<numSeqs;i++){
224 filehandle >> number;
226 name.erase(name.end()-1); //erase the comma
228 //insert new one with new name
229 string group = globaldata->gTreemap->getGroup(name);
230 globaldata->gTreemap->treemap[toString(number)].groupname = group;
231 globaldata->gTreemap->treemap[toString(number)].vectorIndex = globaldata->gTreemap->getIndex(name);
232 //erase old one. so treemap[sarah].groupnumber is now treemap[1].groupnumber. if number is 1 and name is sarah.
233 globaldata->gTreemap->treemap.erase(name);
234 globaldata->gTreemap->namesOfSeqs.push_back(number);
239 catch(exception& e) {
240 m->errorOut(e, "ReadNewickTree", "nexusTranslation");
245 /**************************************************************************************************/
246 int ReadNewickTree::readTreeString() {
254 int ch = filehandle.peek();
257 n = numLeaves; //number of leaves / sequences, we want node 1 to start where the leaves left off
259 lc = readNewickInt(filehandle, n, T);
260 if (lc == -1) { m->mothurOut("error with lc"); m->mothurOutEndLine(); return -1; } //reports an error in reading
262 if(filehandle.peek()==','){
263 readSpecialChar(filehandle,',',"comma");
265 // ';' means end of tree.
266 else if((ch=filehandle.peek())==';' || ch=='['){
271 rc = readNewickInt(filehandle, n, T);
272 if (rc == -1) { m->mothurOut("error with rc"); m->mothurOutEndLine(); return -1; } //reports an error in reading
273 if(filehandle.peek() == ')'){
274 readSpecialChar(filehandle,')',"right parenthesis");
278 //note: treeclimber had the code below added - not sure why?
280 filehandle.putback(ch);
282 filehandle.get(name, MAX_LINE,'\n');
283 SKIPLINE(filehandle, ch);
285 n = T->getIndex(name);
288 m->mothurOut("Internal error: The only taxon is not taxon 0.\n");
290 readOk = -1; return -1;
295 while(((ch=filehandle.get())!=';') && (filehandle.eof() != true)){;}
298 T->tree[n].setChildren(lc,rc);
299 T->tree[n].setBranchLength(0);
300 T->tree[n].setParent(-1);
301 if(lc!=-1){ T->tree[lc].setParent(n); }
302 if(rc!=-1){ T->tree[rc].setParent(n); }
305 //T->printTree(); cout << endl;
309 catch(exception& e) {
310 m->errorOut(e, "ReadNewickTree", "readTreeString");
314 /**************************************************************************************************/
316 int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) {
319 if (m->control_pressed) { return -1; }
321 int c = readNodeChar(f);
325 //to account for multifurcating trees generated by fasttree, we are forcing them to be bifurcating
327 vector<int> childrenNodes;
328 while(f.peek() != ')'){
329 int child = readNewickInt(f, n, T);
330 if (child == -1) { return -1; } //reports an error in reading
331 //cout << "child = " << child << endl;
332 childrenNodes.push_back(child);
334 //after a child you either have , or ), check for both
335 if(f.peek()==')'){ break; }
336 else if (f.peek()==',') { readSpecialChar(f,',',"comma"); }
339 //cout << childrenNodes.size() << endl;
340 if (childrenNodes.size() < 2) { m->mothurOut("Error in tree, please correct."); m->mothurOutEndLine(); return -1; }
342 //then force into 2 node structure
343 for (int i = 1; i < childrenNodes.size(); i++) {
346 if (i == 1) { lc = childrenNodes[i-1]; rc = childrenNodes[i]; }
347 else { lc = n-1; rc = childrenNodes[i]; }
348 //cout << i << '\t' << lc << '\t' << rc << endl;
349 T->tree[n].setChildren(lc,rc);
350 T->tree[lc].setParent(n);
351 T->tree[rc].setParent(n);
353 //T->printTree(); cout << endl;
357 //to account for extra ++ in looping
361 readSpecialChar(f,')',"right parenthesis");
362 //to pass over labels in trees
364 while((c!=',') && (c != -1) && (c!= ':') && (c!=';')&& (c!=')')){ c=filehandle.get(); }
365 filehandle.putback(c);
369 readSpecialChar(f,':',"colon");
371 if(n >= numNodes){ m->mothurOut("Error: Too many nodes in input tree\n"); readOk = -1; return -1; }
373 T->tree[n].setBranchLength(readBranchLength(f));
375 T->tree[n].setBranchLength(0.0);
378 //T->tree[n].setChildren(lc,rc);
379 //T->tree[lc].setParent(n);
380 //T->tree[rc].setParent(n);
381 //T->printTree(); cout << endl;
389 while(d != ':' && d != ',' && d!=')' && d!='\n'){
393 //cout << name << endl;
395 if(d == ':') { blen = 1; }
400 string group = globaldata->gTreemap->getGroup(name);
402 //find index in tree of name
403 int n1 = T->getIndex(name);
405 //adds sequence names that are not in group file to the "xxx" group
406 if(group == "not found") {
407 m->mothurOut("Name: " + name + " is not in your groupfile, and will be disregarded. \n"); //readOk = -1; return n1;
409 globaldata->gTreemap->namesOfSeqs.push_back(name);
410 globaldata->gTreemap->treemap[name].groupname = "xxx";
412 map<string, int>::iterator it;
413 it = globaldata->gTreemap->seqsPerGroup.find("xxx");
414 if (it == globaldata->gTreemap->seqsPerGroup.end()) { //its a new group
415 globaldata->gTreemap->namesOfGroups.push_back("xxx");
416 globaldata->gTreemap->seqsPerGroup["xxx"] = 1;
418 globaldata->gTreemap->seqsPerGroup["xxx"]++;
424 vector<string> tempGroup; tempGroup.push_back(group);
426 T->tree[n1].setGroup(tempGroup);
427 T->tree[n1].setChildren(-1,-1);
431 T->tree[n1].setBranchLength(readBranchLength(f));
433 T->tree[n1].setBranchLength(0.0);
436 while((c=f.get())!=0 && (c != ':' && c != ',' && c!=')') ) {;}
443 catch(exception& e) {
444 m->errorOut(e, "ReadNewickTree", "readNewickInt");
448 /**************************************************************************************************/
449 /**************************************************************************************************/