5 * Created by Sarah Westcott on 1/22/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
12 /***********************************************************************/
13 ReadTree::ReadTree() {
15 globaldata = GlobalData::getInstance();
16 globaldata->gTree.clear();
19 cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
23 cout << "An unknown error has occurred in the ReadTree class function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
27 /***********************************************************************/
28 int ReadTree::readSpecialChar(istream& f, char c, string name) {
35 cerr << "Error: Input file ends prematurely, expecting a " << name << "\n"; return -1;
39 cerr << "Error: Expected " << name << " in input file. Found " << d << ".\n"; return -1;
42 if(d == ')' && f.peek() == '\n'){
48 cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function readSpecialChar. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
52 cout << "An unknown error has occurred in the ReadTree class function readSpecialChar. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
56 /**************************************************************************************************/
58 int ReadTree::readNodeChar(istream& f) {
60 // while(isspace(d=f.get())) {;}
65 cerr << "Error: Input file ends prematurely, expecting a left parenthesis\n"; return -1;
71 cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function readNodeChar. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
75 cout << "An unknown error has occurred in the ReadTree class function readNodeChar. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
80 /**************************************************************************************************/
82 float ReadTree::readBranchLength(istream& f) {
87 cerr << "Error: Missing branch length in input tree.\n"; return -1;
94 cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function readBranchLength. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
98 cout << "An unknown error has occurred in the ReadTree class function readBranchLength. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
104 /***********************************************************************/
105 /***********************************************************************/
108 //Child Classes Below
110 /***********************************************************************/
111 /***********************************************************************/
112 //This class reads a file in Newick form and stores it in a tree.
114 int ReadNewickTree::read() {
119 //if you are not a nexus file
120 if ((c = filehandle.peek()) != '#') {
121 while((c = filehandle.peek()) != EOF) {
124 numNodes = T->getNumNodes();
125 numLeaves = T->getNumLeaves();
127 error = readTreeString();
129 //save trees for later commands
130 globaldata->gTree.push_back(T);
133 //if you are a nexus file
134 }else if ((c = filehandle.peek()) == '#') {
135 nexusTranslation(); //reads file through the translation and updates treemap
136 while((c = filehandle.peek()) != EOF) {
138 while ((c = filehandle.peek()) != EOF) {
139 if(holder == "[" || holder == "[!"){
145 if((holder == "tree" || holder == "end;") && comment != 1){ holder = ""; comment = 0; break;}
146 filehandle >> holder;
149 //pass over the "tree rep.6878900 = "
150 while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF) ) {;}
152 if (c == EOF ) { break; }
153 filehandle.putback(c); //put back first ( of tree.
157 numNodes = T->getNumNodes();
158 numLeaves = T->getNumLeaves();
161 error = readTreeString();
163 //save trees for later commands
164 globaldata->gTree.push_back(T);
169 catch(exception& e) {
170 cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
174 cout << "An unknown error has occurred in the ReadNewickTree class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
178 /**************************************************************************************************/
179 //This function read the file through the translation of the sequences names and updates treemap.
180 void ReadNewickTree::nexusTranslation() {
184 int numSeqs = globaldata->gTreemap->getNumSeqs(); //must save this some when we clear old names we can still know how many sequences there were
188 while(holder != "translate" && holder != "Translate"){
189 if(holder == "[" || holder == "[!"){
195 filehandle >> holder;
196 if(holder == "tree" && comment != 1){return;}
200 globaldata->gTreemap->namesOfSeqs.clear();
201 for(int i=0;i<numSeqs;i++){
203 filehandle >> number;
205 name.erase(name.end()-1); //erase the comma
206 //insert new one with new name
207 globaldata->gTreemap->treemap[toString(number)].groupname = globaldata->gTreemap->treemap[name].groupname;
208 globaldata->gTreemap->treemap[toString(number)].vectorIndex = globaldata->gTreemap->treemap[name].vectorIndex;
209 //erase old one. so treemap[sarah].groupnumber is now treemap[1].groupnumber. if number is 1 and name is sarah.
210 globaldata->gTreemap->treemap.erase(name);
211 globaldata->gTreemap->namesOfSeqs.push_back(number);
214 catch(exception& e) {
215 cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function nexus. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
219 cout << "An unknown error has occurred in the ReadNewickTree class function nexus. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
224 /**************************************************************************************************/
225 int ReadNewickTree::readTreeString() {
233 int ch = filehandle.peek();
236 n = numLeaves; //number of leaves / sequences, we want node 1 to start where the leaves left off
238 lc = readNewickInt(filehandle, n, T);
239 if (lc == -1) { return -1; } //reports an error in reading
241 if(filehandle.peek()==','){
242 error = readSpecialChar(filehandle,',',"comma");
243 if (error == -1) { readOk = -1; return -1; }
245 // ';' means end of tree.
246 else if((ch=filehandle.peek())==';' || ch=='['){
250 rc = readNewickInt(filehandle, n, T);
251 if (rc == -1) { return -1; } //reports an error in reading
252 if(filehandle.peek() == ')'){
253 error = readSpecialChar(filehandle,')',"right parenthesis");
254 if (error == -1) { readOk = -1; return -1; }
258 //note: treeclimber had the code below added - not sure why?
260 filehandle.putback(ch);
262 filehandle.get(name, MAX_LINE,'\n');
263 SKIPLINE(filehandle, ch);
265 n = T->getIndex(name);
268 cerr << "Internal error: The only taxon is not taxon 0.\n";
270 readOk = -1; return -1;
275 while((ch=filehandle.get())!=';'){;}
277 T->tree[n].setChildren(lc,rc);
278 T->tree[n].setBranchLength(0);
279 T->tree[n].setParent(-1);
280 if(lc!=-1){ T->tree[lc].setParent(n); }
281 if(rc!=-1){ T->tree[rc].setParent(n); }
286 catch(exception& e) {
287 cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function readTreeString. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
291 cout << "An unknown error has occurred in the ReadNewickTree class function readTreeString. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
296 /**************************************************************************************************/
298 int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) {
302 int c = readNodeChar(f);
303 if (c == -1) { readOk = -1; return -1; }
306 int lc = readNewickInt(f, n, T);
307 if (lc == -1) { return -1; } //reports an error in reading
308 error = readSpecialChar(f,',',"comma");
309 if (error == -1) { readOk = -1; return -1; }
311 int rc = readNewickInt(f, n, T);
312 if (rc == -1) { return -1; } //reports an error in reading
314 error = readSpecialChar(f,')',"right parenthesis");
315 if (error == -1) { readOk = -1; return -1; }
319 error = readSpecialChar(f,':',"colon");
320 if (error == -1) { readOk = -1; return -1; }
321 if(n >= numNodes){ cerr << "Error: Too many nodes in input tree\n"; readOk = -1; return -1; }
322 error = readBranchLength(f);
323 if (error == -1) { readOk = -1; return -1; }
324 T->tree[n].setBranchLength(error);
325 }else{T->tree[n].setBranchLength(0.0); }
327 T->tree[n].setChildren(lc,rc);
328 T->tree[lc].setParent(n);
329 T->tree[rc].setParent(n);
336 while(d != ':' && d != ',' && d!=')' && d!='\n'){
342 if(d == ':') { blen = 1; }
347 string group = globaldata->gTreemap->getGroup(name);
349 //find index in tree of name
350 int n1 = T->getIndex(name);
352 //adds sequence names that are not in group file to the "xxx" group
354 cerr << "Name: " << name << " not found in your groupfile. \n"; readOk = -1; return n1;
356 //globaldata->gTreemap->namesOfSeqs.push_back(name);
357 //globaldata->gTreemap->treemap[name].groupname = "xxx";
358 //globaldata->gTreemap->treemap[name].vectorIndex = (globaldata->gTreemap->namesOfSeqs.size() - 1);
360 //map<string, int>::iterator it;
361 //it = globaldata->gTreemap->seqsPerGroup.find("xxx");
362 //if (it == globaldata->gTreemap->seqsPerGroup.end()) { //its a new group
363 // globaldata->gTreemap->namesOfGroups.push_back("xxx");
364 // globaldata->gTreemap->seqsPerGroup["xxx"] = 1;
366 // globaldata->gTreemap->seqsPerGroup["xxx"]++;
369 //find index in tree of name
370 //n1 = T->getIndex(name);
373 //numNodes = 2*numLeaves - 1;
376 T->tree[n1].setGroup(group);
377 T->tree[n1].setChildren(-1,-1);
381 error = readBranchLength(f);
382 if (error == -1) { readOk = -1; return -1; }
383 T->tree[n1].setBranchLength(error);
385 T->tree[n1].setBranchLength(0.0);
388 while((c=f.get())!=0 && (c != ':' && c != ',' && c!=')') ) {;}
394 catch(exception& e) {
395 cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function readNewickInt. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
399 cout << "An unknown error has occurred in the ReadNewickTree class function readNewickInt. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
403 /**************************************************************************************************/
404 /**************************************************************************************************/