X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=readtree.cpp;h=c33617fca57a483da46f179e384497661e205878;hb=9651e8e7172d86707b34af15e95ec60ad4c3c3f9;hp=9b96861a661a6a794120f2d47899222f3664e33b;hpb=60fdefb3a300b59c3bbeffdca8a5f1f30c6a43af;p=mothur.git diff --git a/readtree.cpp b/readtree.cpp index 9b96861..c33617f 100644 --- a/readtree.cpp +++ b/readtree.cpp @@ -10,15 +10,10 @@ #include "readtree.h" /***********************************************************************/ -//Parent Class -// The following functions are used by all reading formats. -/***********************************************************************/ -ReadTree::ReadTree() { +ReadTree::ReadTree() { try { - globaldata = GlobalData::getInstance(); - T = new Tree(); - numNodes = T->getNumNodes(); - numLeaves = T->getNumLeaves(); + globaldata = GlobalData::getInstance(); + globaldata->gTree.clear(); } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; @@ -32,22 +27,21 @@ ReadTree::ReadTree() { /***********************************************************************/ int ReadTree::readSpecialChar(istream& f, char c, string name) { try { - char d; - while(isspace(d=f.get())) {;} + gobble(f); + char d = f.get(); + if(d == EOF){ - cerr << "Error: Input file ends prematurely, expecting a " << name << "\n"; - exit(1); + cerr << "Error: Input file ends prematurely, expecting a " << name << "\n"; return -1; + //exit(1); } if(d != c){ - cerr << "Error: Expected " << name << " in input file. Found " << d << ".\n"; - exit(1); + cerr << "Error: Expected " << name << " in input file. Found " << d << ".\n"; return -1; + //exit(1); } if(d == ')' && f.peek() == '\n'){ - while(isspace(d=f.get())) {;} - f.putback(d); + gobble(f); } - return d; } catch(exception& e) { @@ -63,11 +57,13 @@ int ReadTree::readSpecialChar(istream& f, char c, string name) { int ReadTree::readNodeChar(istream& f) { try { - char d; - while(isspace(d=f.get())) {;} +// while(isspace(d=f.get())) {;} + gobble(f); + char d = f.get(); + if(d == EOF){ - cerr << "Error: Input file ends prematurely, expecting a left parenthesis\n"; - exit(1); + cerr << "Error: Input file ends prematurely, expecting a left parenthesis\n"; return -1; + //exit(1); } return d; } @@ -88,10 +84,10 @@ float ReadTree::readBranchLength(istream& f) { float b; if(!(f >> b)){ - cerr << "Error: Missing branch length in input tree.\n"; - exit(1); + cerr << "Error: Missing branch length in input tree.\n"; return -1; + //exit(1); } - + gobble(f); return b; } catch(exception& e) { @@ -115,10 +111,122 @@ float ReadTree::readBranchLength(istream& f) { /***********************************************************************/ //This class reads a file in Newick form and stores it in a tree. -void ReadNewickTree::read() { +int ReadNewickTree::read() { + try { + int c, error; + int comment = 0; + + //if you are not a nexus file + if ((c = filehandle.peek()) != '#') { + while((c = filehandle.peek()) != EOF) { + //make new tree + T = new Tree(); + numNodes = T->getNumNodes(); + numLeaves = T->getNumLeaves(); + + error = readTreeString(); + + //save trees for later commands + globaldata->gTree.push_back(T); + gobble(filehandle); + } + //if you are a nexus file + }else if ((c = filehandle.peek()) == '#') { + nexusTranslation(); //reads file through the translation and updates treemap + while((c = filehandle.peek()) != EOF) { + // get past comments + while ((c = filehandle.peek()) != EOF) { + if(holder == "[" || holder == "[!"){ + comment = 1; + } + if(holder == "]"){ + comment = 0; + } + if((holder == "tree" || holder == "end;") && comment != 1){ holder = ""; comment = 0; break;} + filehandle >> holder; + } + + //pass over the "tree rep.6878900 = " + while (((c = filehandle.get()) != '(') && ((c = filehandle.peek()) != EOF) ) {;} + + if (c == EOF ) { break; } + filehandle.putback(c); //put back first ( of tree. + + //make new tree + T = new Tree(); + numNodes = T->getNumNodes(); + numLeaves = T->getNumLeaves(); + + //read tree info + error = readTreeString(); + + //save trees for later commands + globaldata->gTree.push_back(T); + } + } + return readOk; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ReadNewickTree class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/**************************************************************************************************/ +//This function read the file through the translation of the sequences names and updates treemap. +void ReadNewickTree::nexusTranslation() { + try { + + holder = ""; + int numSeqs = globaldata->gTreemap->getNumSeqs(); //must save this some when we clear old names we can still know how many sequences there were + int comment = 0; + + // get past comments + while(holder != "translate" && holder != "Translate"){ + if(holder == "[" || holder == "[!"){ + comment = 1; + } + if(holder == "]"){ + comment = 0; + } + filehandle >> holder; + if(holder == "tree" && comment != 1){return;} + } + + //update treemap + globaldata->gTreemap->namesOfSeqs.clear(); + for(int i=0;i> number; + filehandle >> name; + name.erase(name.end()-1); //erase the comma + //insert new one with new name + globaldata->gTreemap->treemap[toString(number)].groupname = globaldata->gTreemap->treemap[name].groupname; + globaldata->gTreemap->treemap[toString(number)].vectorIndex = globaldata->gTreemap->treemap[name].vectorIndex; + //erase old one. so treemap[sarah].groupnumber is now treemap[1].groupnumber. if number is 1 and name is sarah. + globaldata->gTreemap->treemap.erase(name); + globaldata->gTreemap->namesOfSeqs.push_back(number); + } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function nexus. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ReadNewickTree class function nexus. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/**************************************************************************************************/ +int ReadNewickTree::readTreeString() { try { + int n = 0; - int lc, rc; + int lc, rc, error; int rooted = 0; @@ -126,10 +234,13 @@ void ReadNewickTree::read() { if(ch == '('){ n = numLeaves; //number of leaves / sequences, we want node 1 to start where the leaves left off + lc = readNewickInt(filehandle, n, T); + if (lc == -1) { return -1; } //reports an error in reading if(filehandle.peek()==','){ - readSpecialChar(filehandle,',',"comma"); + error = readSpecialChar(filehandle,',',"comma"); + if (error == -1) { readOk = -1; return -1; } } // ';' means end of tree. else if((ch=filehandle.peek())==';' || ch=='['){ @@ -137,67 +248,80 @@ void ReadNewickTree::read() { } if(rooted != 1){ rc = readNewickInt(filehandle, n, T); + if (rc == -1) { return -1; } //reports an error in reading if(filehandle.peek() == ')'){ - readSpecialChar(filehandle,')',"right parenthesis"); + error = readSpecialChar(filehandle,')',"right parenthesis"); + if (error == -1) { readOk = -1; return -1; } } } } //note: treeclimber had the code below added - not sure why? - else{ + else{ filehandle.putback(ch); char name[MAX_LINE]; filehandle.get(name, MAX_LINE,'\n'); SKIPLINE(filehandle, ch); - n = T->getIndex(name); + if(n!=0){ cerr << "Internal error: The only taxon is not taxon 0.\n"; - exit(1); + //exit(1); + readOk = -1; return -1; } lc = rc = -1; } while((ch=filehandle.get())!=';'){;} - if(rooted != 1){ + if(rooted != 1){ T->tree[n].setChildren(lc,rc); T->tree[n].setBranchLength(0); T->tree[n].setParent(-1); if(lc!=-1){ T->tree[lc].setParent(n); } if(rc!=-1){ T->tree[rc].setParent(n); } } - - //save tree for later commands - globaldata->gTree = T; + return 0; + } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function readTreeString. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the ReadNewickTree class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the ReadNewickTree class function readTreeString. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } + } /**************************************************************************************************/ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) { try { + int error; + int c = readNodeChar(f); + if (c == -1) { readOk = -1; return -1; } if(c == '('){ int lc = readNewickInt(f, n, T); - readSpecialChar(f,',',"comma"); - - int rc = readNewickInt(f, n, T); + if (lc == -1) { return -1; } //reports an error in reading + error = readSpecialChar(f,',',"comma"); + if (error == -1) { readOk = -1; return -1; } + + int rc = readNewickInt(f, n, T); + if (rc == -1) { return -1; } //reports an error in reading if(f.peek()==')'){ - readSpecialChar(f,')',"right parenthesis"); + error = readSpecialChar(f,')',"right parenthesis"); + if (error == -1) { readOk = -1; return -1; } } if(f.peek() == ':'){ - readSpecialChar(f,':',"colon"); - if(n >= numNodes){ cerr << "Error: Too many nodes in input tree\n"; exit(1); } - T->tree[n].setBranchLength(readBranchLength(f)); + error = readSpecialChar(f,':',"colon"); + if (error == -1) { readOk = -1; return -1; } + if(n >= numNodes){ cerr << "Error: Too many nodes in input tree\n"; readOk = -1; return -1; } + error = readBranchLength(f); + if (error == -1) { readOk = -1; return -1; } + T->tree[n].setBranchLength(error); }else{T->tree[n].setBranchLength(0.0); } T->tree[n].setChildren(lc,rc); @@ -225,15 +349,38 @@ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) { //find index in tree of name int n1 = T->getIndex(name); - if(n1 == -1){cerr << "Name: " << name << " not found\n"; exit(1);} + //adds sequence names that are not in group file to the "xxx" group + if(n1 == -1) { + cerr << "Name: " << name << " not found in your groupfile. \n"; readOk = -1; return n1; + + //globaldata->gTreemap->namesOfSeqs.push_back(name); + //globaldata->gTreemap->treemap[name].groupname = "xxx"; + //globaldata->gTreemap->treemap[name].vectorIndex = (globaldata->gTreemap->namesOfSeqs.size() - 1); + + //map::iterator it; + //it = globaldata->gTreemap->seqsPerGroup.find("xxx"); + //if (it == globaldata->gTreemap->seqsPerGroup.end()) { //its a new group + // globaldata->gTreemap->namesOfGroups.push_back("xxx"); + // globaldata->gTreemap->seqsPerGroup["xxx"] = 1; + //}else { + // globaldata->gTreemap->seqsPerGroup["xxx"]++; + //} + + //find index in tree of name + //n1 = T->getIndex(name); + //group = "xxx"; + //numLeaves++; + //numNodes = 2*numLeaves - 1; + } - else T->tree[n1].setGroup(group); - + T->tree[n1].setGroup(group); T->tree[n1].setChildren(-1,-1); if(blen == 1){ - f.get(); - T->tree[n1].setBranchLength(readBranchLength(f)); + f.get(); + error = readBranchLength(f); + if (error == -1) { readOk = -1; return -1; } + T->tree[n1].setBranchLength(error); }else{ T->tree[n1].setBranchLength(0.0); } @@ -255,4 +402,4 @@ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) { } /**************************************************************************************************/ /**************************************************************************************************/ - \ No newline at end of file +