5 * Created by Sarah Westcott on 1/22/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
12 /***********************************************************************/
13 ReadTree::ReadTree() {
15 m = MothurOut::getInstance();
18 m->errorOut(e, "ReadTree", "ReadTree");
22 /***********************************************************************/
23 int ReadTree::AssembleTrees() {
25 //assemble users trees
26 for (int i = 0; i < Trees.size(); i++) {
27 if (m->control_pressed) { return 0; }
28 Trees[i]->assembleTree();
33 m->errorOut(e, "ReadTree", "AssembleTrees");
37 /***********************************************************************/
38 int ReadTree::readSpecialChar(istream& f, char c, string name) {
45 m->mothurOut("Error: Input file ends prematurely, expecting a " + name + "\n");
49 m->mothurOut("Error: Expected " + name + " in input file. Found " + toString(d) + ".\n");
52 if(d == ')' && f.peek() == '\n'){
58 m->errorOut(e, "ReadTree", "readSpecialChar");
62 /**************************************************************************************************/
64 int ReadTree::readNodeChar(istream& f) {
66 // while(isspace(d=f.get())) {;}
71 m->mothurOut("Error: Input file ends prematurely, expecting a left parenthesis\n");
77 m->errorOut(e, "ReadTree", "readNodeChar");
82 /**************************************************************************************************/
84 float ReadTree::readBranchLength(istream& f) {
89 m->mothurOut("Error: Missing branch length in input tree.\n");
96 m->errorOut(e, "ReadTree", "readBranchLength");
101 /***********************************************************************/
102 /***********************************************************************/
104 //Child Classes Below
106 /***********************************************************************/
107 /***********************************************************************/
108 //This class reads a file in Newick form and stores it in a tree.
110 int ReadNewickTree::read(TreeMap* tmap) {
116 //if you are not a nexus file
117 if ((c = filehandle.peek()) != '#') {
118 while((c = filehandle.peek()) != EOF) {
119 while ((c = filehandle.peek()) != EOF) {
127 if((c == '(') && (comment != 1)){ break; }
134 numNodes = T->getNumNodes();
135 numLeaves = T->getNumLeaves();
137 error = readTreeString(tmap);
139 //save trees for later commands
141 m->gobble(filehandle);
143 //if you are a nexus file
144 }else if ((c = filehandle.peek()) == '#') {
145 //get right number of seqs from nexus file.
146 Tree* temp = new Tree(tmap); delete temp;
148 nexusTranslation(tmap); //reads file through the translation and updates treemap
149 while((c = filehandle.peek()) != EOF) {
151 while ((c = filehandle.peek()) != EOF) {
152 if(holder == "[" || holder == "[!"){
158 if((holder == "tree" || holder == "end;") && comment != 1){ holder = ""; comment = 0; break;}
159 filehandle >> holder;
162 //pass over the "tree rep.6878900 = "
163 while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF) ) {;}
165 if (c == EOF ) { break; }
166 filehandle.putback(c); //put back first ( of tree.
170 numNodes = T->getNumNodes();
171 numLeaves = T->getNumLeaves();
174 error = readTreeString(tmap);
176 //save trees for later commands
181 if (error != 0) { readOk = error; }
187 catch(exception& e) {
188 m->errorOut(e, "ReadNewickTree", "read");
192 /**************************************************************************************************/
193 //This function read the file through the translation of the sequences names and updates treemap.
194 string ReadNewickTree::nexusTranslation(TreeMap* tmap) {
198 int numSeqs = m->Treenames.size(); //must save this some when we clear old names we can still know how many sequences there were
202 while(holder != "translate" && holder != "Translate"){
203 if(holder == "[" || holder == "[!"){
209 filehandle >> holder;
210 if(holder == "tree" && comment != 1){return holder;}
214 tmap->namesOfSeqs.clear();
218 while ((c = filehandle.peek()) != EOF) {
220 filehandle >> number;
222 if ((number == "tree") || (number == ";") ) { name = number; break; }
227 if (name.length() != 0) { lastChar = name[name.length()-1]; }
229 if ((name == "tree") || (name == ";") ) { break; }
231 if (lastChar == ',') { name.erase(name.end()-1); } //erase the comma
235 for(int i=0;i<numSeqs;i++){
237 filehandle >> number;
239 name.erase(name.end()-1); //erase the comma
241 //insert new one with new name
242 string group = tmap->getGroup(name);
243 tmap->treemap[toString(number)].groupname = group;
244 tmap->treemap[toString(number)].vectorIndex = tmap->getIndex(name);
245 //erase old one. so treemap[sarah].groupnumber is now treemap[1].groupnumber. if number is 1 and name is sarah.
246 tmap->treemap.erase(name);
247 tmap->namesOfSeqs.push_back(number);
252 catch(exception& e) {
253 m->errorOut(e, "ReadNewickTree", "nexusTranslation");
258 /**************************************************************************************************/
259 int ReadNewickTree::readTreeString(TreeMap* tmap) {
267 int ch = filehandle.peek();
270 n = numLeaves; //number of leaves / sequences, we want node 1 to start where the leaves left off
272 lc = readNewickInt(filehandle, n, T, tmap);
273 if (lc == -1) { m->mothurOut("error with lc"); m->mothurOutEndLine(); return -1; } //reports an error in reading
275 if(filehandle.peek()==','){
276 readSpecialChar(filehandle,',',"comma");
278 // ';' means end of tree.
279 else if((ch=filehandle.peek())==';' || ch=='['){
284 rc = readNewickInt(filehandle, n, T, tmap);
285 if (rc == -1) { m->mothurOut("error with rc"); m->mothurOutEndLine(); return -1; } //reports an error in reading
286 if(filehandle.peek() == ')'){
287 readSpecialChar(filehandle,')',"right parenthesis");
291 //note: treeclimber had the code below added - not sure why?
293 filehandle.putback(ch);
295 filehandle.get(name, MAX_LINE,'\n');
296 SKIPLINE(filehandle, ch);
298 n = T->getIndex(name);
301 m->mothurOut("Internal error: The only taxon is not taxon 0.\n");
303 readOk = -1; return -1;
308 while(((ch=filehandle.get())!=';') && (filehandle.eof() != true)){;}
311 T->tree[n].setChildren(lc,rc);
312 T->tree[n].setBranchLength(0);
313 T->tree[n].setParent(-1);
314 if(lc!=-1){ T->tree[lc].setParent(n); }
315 if(rc!=-1){ T->tree[rc].setParent(n); }
318 //T->printTree(); cout << endl;
322 catch(exception& e) {
323 m->errorOut(e, "ReadNewickTree", "readTreeString");
327 /**************************************************************************************************/
329 int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T, TreeMap* tmap) {
332 if (m->control_pressed) { return -1; }
334 int c = readNodeChar(f);
338 //to account for multifurcating trees generated by fasttree, we are forcing them to be bifurcating
340 vector<int> childrenNodes;
341 while(f.peek() != ')'){
342 int child = readNewickInt(f, n, T, tmap);
343 if (child == -1) { return -1; } //reports an error in reading
344 //cout << "child = " << child << endl;
345 childrenNodes.push_back(child);
347 //after a child you either have , or ), check for both
348 if(f.peek()==')'){ break; }
349 else if (f.peek()==',') { readSpecialChar(f,',',"comma"); }
352 //cout << childrenNodes.size() << endl;
353 if (childrenNodes.size() < 2) { m->mothurOut("Error in tree, please correct."); m->mothurOutEndLine(); return -1; }
355 //then force into 2 node structure
356 for (int i = 1; i < childrenNodes.size(); i++) {
359 if (i == 1) { lc = childrenNodes[i-1]; rc = childrenNodes[i]; }
360 else { lc = n-1; rc = childrenNodes[i]; }
361 //cout << i << '\t' << lc << '\t' << rc << endl;
362 T->tree[n].setChildren(lc,rc);
363 T->tree[lc].setParent(n);
364 T->tree[rc].setParent(n);
366 //T->printTree(); cout << endl;
370 //to account for extra ++ in looping
374 readSpecialChar(f,')',"right parenthesis");
375 //to pass over labels in trees
377 while((c!=',') && (c != -1) && (c!= ':') && (c!=';')&& (c!=')')){ c=filehandle.get(); }
378 filehandle.putback(c);
382 readSpecialChar(f,':',"colon");
384 if(n >= numNodes){ m->mothurOut("Error: Too many nodes in input tree\n"); readOk = -1; return -1; }
386 T->tree[n].setBranchLength(readBranchLength(f));
388 T->tree[n].setBranchLength(0.0);
391 //T->tree[n].setChildren(lc,rc);
392 //T->tree[lc].setParent(n);
393 //T->tree[rc].setParent(n);
394 //T->printTree(); cout << endl;
402 while(d != ':' && d != ',' && d!=')' && d!='\n'){
406 //cout << name << endl;
408 if(d == ':') { blen = 1; }
413 string group = tmap->getGroup(name);
415 //find index in tree of name
416 int n1 = T->getIndex(name);
418 //adds sequence names that are not in group file to the "xxx" group
419 if(group == "not found") {
420 m->mothurOut("Name: " + name + " is not in your groupfile, and will be disregarded. \n"); //readOk = -1; return n1;
422 tmap->namesOfSeqs.push_back(name);
423 tmap->treemap[name].groupname = "xxx";
425 map<string, int>::iterator it;
426 it = tmap->seqsPerGroup.find("xxx");
427 if (it == tmap->seqsPerGroup.end()) { //its a new group
428 tmap->addGroup("xxx");
429 tmap->seqsPerGroup["xxx"] = 1;
431 tmap->seqsPerGroup["xxx"]++;
437 vector<string> tempGroup; tempGroup.push_back(group);
439 T->tree[n1].setGroup(tempGroup);
440 T->tree[n1].setChildren(-1,-1);
444 T->tree[n1].setBranchLength(readBranchLength(f));
446 T->tree[n1].setBranchLength(0.0);
449 while((c=f.get())!=0 && (c != ':' && c != ',' && c!=')') ) {;}
456 catch(exception& e) {
457 m->errorOut(e, "ReadNewickTree", "readNewickInt");
461 /**************************************************************************************************/
462 /**************************************************************************************************/