]> git.donarmstrong.com Git - mothur.git/blobdiff - readtree.cpp
fixed bug in cluster.split with classify method
[mothur.git] / readtree.cpp
index c33617fca57a483da46f179e384497661e205878..c805edc9896bf15b4e7397305c13d8bfbce175d8 100644 (file)
 ReadTree::ReadTree() {
        try {
                globaldata = GlobalData::getInstance();
+               m = MothurOut::getInstance();
                globaldata->gTree.clear();
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "ReadTree", "ReadTree");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the ReadTree class function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }               
 }
 /***********************************************************************/
 int ReadTree::readSpecialChar(istream& f, char c, string name) {
@@ -32,12 +29,12 @@ int ReadTree::readSpecialChar(istream& f, char c, string name) {
                char d = f.get();
        
                if(d == EOF){
-                       cerr << "Error: Input file ends prematurely, expecting a " << name << "\n";  return -1;
-                       //exit(1);
+                       m->mothurOut("Error: Input file ends prematurely, expecting a " + name + "\n");
+                       exit(1);
                }
                if(d != c){
-                       cerr << "Error: Expected " << name << " in input file.  Found " << d << ".\n";  return -1;
-                       //exit(1);
+                       m->mothurOut("Error: Expected " + name + " in input file.  Found " + toString(d) + ".\n");
+                       exit(1);
                }
                if(d == ')' && f.peek() == '\n'){
                        gobble(f);
@@ -45,13 +42,9 @@ int ReadTree::readSpecialChar(istream& f, char c, string name) {
                return d;
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function readSpecialChar. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "ReadTree", "readSpecialChar");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the ReadTree class function readSpecialChar. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }               
 }
 /**************************************************************************************************/
 
@@ -62,19 +55,15 @@ int ReadTree::readNodeChar(istream& f) {
                char d = f.get();
 
                if(d == EOF){
-                       cerr << "Error: Input file ends prematurely, expecting a left parenthesis\n";  return -1;
-                       //exit(1);
+                       m->mothurOut("Error: Input file ends prematurely, expecting a left parenthesis\n");
+                       exit(1);
                }
                return d;
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function readNodeChar. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "ReadTree", "readNodeChar");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the ReadTree class function readNodeChar. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }               
 }
 
 /**************************************************************************************************/
@@ -84,27 +73,21 @@ float ReadTree::readBranchLength(istream& f) {
                float b;
        
                if(!(f >> b)){
-                       cerr << "Error: Missing branch length in input tree.\n";  return -1;
-                       //exit(1);
+                       m->mothurOut("Error: Missing branch length in input tree.\n");
+                       exit(1);
                }
                gobble(f);
                return b;
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function readBranchLength. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "ReadTree", "readBranchLength");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the ReadTree class function readBranchLength. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }               
 }
 
-
 /***********************************************************************/
 /***********************************************************************/
 
-
 //Child Classes Below
 
 /***********************************************************************/
@@ -113,14 +96,28 @@ float ReadTree::readBranchLength(istream& f) {
 
 int ReadNewickTree::read() {
        try {
+               holder = "";
                int c, error;
                int comment = 0;
                
                //if you are not a nexus file 
                if ((c = filehandle.peek()) != '#') {  
                        while((c = filehandle.peek()) != EOF) { 
+                               while ((c = filehandle.peek()) != EOF) {
+                                       // get past comments
+                                       if(c == '[') {
+                                               comment = 1;
+                                       }
+                                       if(c == ']'){
+                                               comment = 0;
+                                       }
+                                       if((c == '(') && (comment != 1)){ break; }
+                                       filehandle.get();
+                               }
+
                                //make new tree
                                T = new Tree(); 
+
                                numNodes = T->getNumNodes();
                                numLeaves = T->getNumLeaves();
                                
@@ -164,16 +161,16 @@ int ReadNewickTree::read() {
                                globaldata->gTree.push_back(T); 
                        }
                }
+               
+               if (error != 0) { readOk = error; } 
+               
+               filehandle.close();
                return readOk;
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "ReadNewickTree", "read");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the ReadNewickTree class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }               
 }
 /**************************************************************************************************/
 //This function read the file through the translation of the sequences names and updates treemap.
@@ -212,13 +209,9 @@ void ReadNewickTree::nexusTranslation() {
                }
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function nexus. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "ReadNewickTree", "nexusTranslation");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the ReadNewickTree class function nexus. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }               
 }
 
 /**************************************************************************************************/
@@ -226,7 +219,7 @@ int ReadNewickTree::readTreeString() {
        try {
                
                int n = 0;
-               int lc, rc, error
+               int lc, rc; 
                
                int rooted = 0;
        
@@ -236,11 +229,10 @@ int ReadNewickTree::readTreeString() {
                        n = numLeaves;  //number of leaves / sequences, we want node 1 to start where the leaves left off
 
                        lc = readNewickInt(filehandle, n, T);
-                       if (lc == -1) { return -1; } //reports an error in reading
-               
+                       if (lc == -1) { m->mothurOut("error with lc"); m->mothurOutEndLine(); return -1; } //reports an error in reading
+       
                        if(filehandle.peek()==','){                                                     
-                               error = readSpecialChar(filehandle,',',"comma");
-                               if (error == -1) { readOk = -1; return -1; }
+                               readSpecialChar(filehandle,',',"comma");
                        }
                        // ';' means end of tree.                                                                                               
                        else if((ch=filehandle.peek())==';' || ch=='['){                
@@ -248,10 +240,9 @@ int ReadNewickTree::readTreeString() {
                        }                                                                                               
                        if(rooted != 1){                                                                
                                rc = readNewickInt(filehandle, n, T);
-                               if (rc == -1) { return -1; } //reports an error in reading
+                               if (rc == -1) { m->mothurOut("error with rc"); m->mothurOutEndLine(); return -1; } //reports an error in reading
                                if(filehandle.peek() == ')'){                                   
-                                       error = readSpecialChar(filehandle,')',"right parenthesis");
-                                       if (error == -1) { readOk = -1; return -1; }
+                                       readSpecialChar(filehandle,')',"right parenthesis");
                                }                                                                                       
                        }                                                                                               
                }
@@ -265,14 +256,14 @@ int ReadNewickTree::readTreeString() {
                        n = T->getIndex(name);
 
                        if(n!=0){
-                               cerr << "Internal error: The only taxon is not taxon 0.\n";
+                               m->mothurOut("Internal error: The only taxon is not taxon 0.\n");
                                //exit(1);
                                readOk = -1; return -1;
                        }
                        lc = rc = -1;
                } 
                
-               while((ch=filehandle.get())!=';'){;}                                            
+               while(((ch=filehandle.get())!=';') && (filehandle.eof() != true)){;}                                            
                if(rooted != 1){                                                                        
                        T->tree[n].setChildren(lc,rc);
                        T->tree[n].setBranchLength(0);
@@ -284,51 +275,134 @@ int ReadNewickTree::readTreeString() {
        
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function readTreeString. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "ReadNewickTree", "readTreeString");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the ReadNewickTree class function readTreeString. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }               
-
 }
 /**************************************************************************************************/
 
 int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) {
        try {
-               int error;
+               
+               if (m->control_pressed) { return -1; } 
                
                int c = readNodeChar(f);
-               if (c == -1) { readOk = -1; return -1; }
-    
+  string k;
+k = c;
+       cout << "at beginning = " << k <<endl;  
                if(c == '('){
-                       int lc = readNewickInt(f, n, T);
-                       if (lc == -1) { return -1; } //reports an error in reading
-                       error = readSpecialChar(f,',',"comma");
-                       if (error == -1) { readOk = -1; return -1; }
+               
+                       //to account for multifurcating trees generated by fasttree, we are forcing them to be bifurcating
+                       //read all children
+                       vector<int> childrenNodes;
+                       while(f.peek() != ')'){
+                               int child = readNewickInt(f, n, T);
+                               if (child == -1) { return -1; } //reports an error in reading
+                               
+                               childrenNodes.push_back(child);
+                               
+                               //after a child you either have , or ), check for both
+                               if(f.peek()==')'){  break;  }
+                               else if (f.peek()==',') {   readSpecialChar(f,',',"comma");  }
+                               else { string k;
+                       k = f.peek();
+       cout << "in here k = " << k << '\t' << f.tellg() <<endl;
+ }
+                       }
+       cout << childrenNodes.size() << endl;           
+                       if (childrenNodes.size() < 2) {  m->mothurOut("Error in tree, please correct."); m->mothurOutEndLine(); return -1; }
+                       
+                       //then force into 2 node structure
+                       for (int i = 1; i < childrenNodes.size(); i++) {
+                       
+                               int lc, rc;
+                               if (i == 1) { lc = childrenNodes[i-1]; rc = childrenNodes[i]; }
+                               else { lc = n; rc = childrenNodes[i]; }
+                       cout << i << '\t' << lc << '\t' << rc << endl;  
+                               T->tree[n].setChildren(lc,rc);
+                               T->tree[lc].setParent(n);
+                               T->tree[rc].setParent(n);
+                               
+                               T->printTree(); cout << endl;
+                               n++;
+                       }
+                       
+                       //to account for extra ++ in looping
+                       n--;
+                       //int lc = readNewickInt(f, n, T);
+                       //if (lc == -1) { return -1; } //reports an error in reading
+                       
+                       //readSpecialChar(f,',',"comma");
 
-                       int rc = readNewickInt(f, n, T);
-                       if (rc == -1) { return -1; }  //reports an error in reading     
+                       //int rc = readNewickInt(f, n, T);
+                       //if (rc == -1) { return -1; }  //reports an error in reading   
+                       
                        if(f.peek()==')'){      
-                               error = readSpecialChar(f,')',"right parenthesis");     
-                               if (error == -1) { readOk = -1; return -1; }                            
+                               readSpecialChar(f,')',"right parenthesis");     
+                               //to pass over labels in trees
+                               c=filehandle.get();
+                               while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ c=filehandle.get(); }
+                               filehandle.putback(c);
                        }                       
                
                        if(f.peek() == ':'){                                                                          
-                               error = readSpecialChar(f,':',"colon"); 
-                               if (error == -1) { readOk = -1; return -1; }                                            
-                               if(n >= numNodes){      cerr << "Error: Too many nodes in input tree\n";  readOk = -1; return -1; }
-                               error = readBranchLength(f);
-                               if (error == -1) { readOk = -1; return -1; }
-                               T->tree[n].setBranchLength(error);
-                       }else{T->tree[n].setBranchLength(0.0); }                                                
+                               readSpecialChar(f,':',"colon"); 
+                                                                               
+                               if(n >= numNodes){      m->mothurOut("Error: Too many nodes in input tree\n");  readOk = -1; return -1; }
+                               
+                               T->tree[n].setBranchLength(readBranchLength(f));
+                       }else{
+                               T->tree[n].setBranchLength(0.0); 
+                       }                                               
+                       
+                       //to account for multifurcating trees generated by fasttree, we are forcing them to be bifurcating
+                       /*while(f.peek() == ','){
+                       string k;
+                       k = f.peek();
+       cout << "in here k = " << k << '\t' << f.tellg() <<endl;
+                               //force this node to be left child and read new rc
+                               T->tree[n].setChildren(lc,rc);
+                               T->tree[lc].setParent(n);
+                               T->tree[rc].setParent(n);
+                               
+                               T->printTree(); cout << endl;
+                               lc = n;
+                               n++;
+                               
+                               readSpecialChar(f,',',"comma");
+
+                               rc = readNewickInt(f, n, T);
                
-                       T->tree[n].setChildren(lc,rc);
-                       T->tree[lc].setParent(n);
-                       T->tree[rc].setParent(n);
+                               if (rc == -1) { return -1; }  //reports an error in reading     
+                               
+                               if(f.peek()==')'){      
+                                       readSpecialChar(f,')',"right parenthesis");     
+                                       //to pass over labels in trees
+                                       c=filehandle.get();
+                                       while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ c=filehandle.get(); }
+                                       filehandle.putback(c);
+                                       
+                                       if(f.peek() == ':'){                                                                          
+                                               readSpecialChar(f,':',"colon"); 
+                                       
+                                               if(n >= numNodes){      m->mothurOut("Error: Too many nodes in input tree\n");  readOk = -1; return -1; }
+                                       
+                                               T->tree[n].setBranchLength(readBranchLength(f));
+                                       }else{
+                                               T->tree[n].setBranchLength(0.0); 
+                                       }                                               
+
+                                       break;
+                               }                       
+                       }*/
                
+                       //T->tree[n].setChildren(lc,rc);
+                       //T->tree[lc].setParent(n);
+                       //T->tree[rc].setParent(n);
+                       //T->printTree();  cout << endl;
+                       
                        return n++;
+               
                }else{
                        f.putback(c);
                        string name = "";
@@ -337,9 +411,9 @@ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) {
                                name += d;
                                d=f.get();
                        }
-               
+               cout << name << endl;
                        int blen = 0;
-                       if(d == ':')    {               blen = 1;                       }               
+                       if(d == ':')    {               blen = 1;       }               
                
                        f.putback(d);
                
@@ -350,37 +424,32 @@ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) {
                        int n1 = T->getIndex(name);
                        
                        //adds sequence names that are not in group file to the "xxx" group
-                       if(n1 == -1) {
-                               cerr << "Name: " << name << " not found in your groupfile. \n"; readOk = -1; return n1;
+                       if(group == "not found") {
+                               m->mothurOut("Name: " + name + " is not in your groupfile, and will be disregarded. \n");  //readOk = -1; return n1;
                                
-                               //globaldata->gTreemap->namesOfSeqs.push_back(name);
-                               //globaldata->gTreemap->treemap[name].groupname = "xxx";
-                               //globaldata->gTreemap->treemap[name].vectorIndex = (globaldata->gTreemap->namesOfSeqs.size() - 1);
+                               globaldata->gTreemap->namesOfSeqs.push_back(name);
+                               globaldata->gTreemap->treemap[name].groupname = "xxx";
                                
-                               //map<string, int>::iterator it;
-                               //it = globaldata->gTreemap->seqsPerGroup.find("xxx");
-                               //if (it == globaldata->gTreemap->seqsPerGroup.end()) { //its a new group
-                               //      globaldata->gTreemap->namesOfGroups.push_back("xxx");
-                               //      globaldata->gTreemap->seqsPerGroup["xxx"] = 1;
-                               //}else {
-                               //      globaldata->gTreemap->seqsPerGroup["xxx"]++;
-                               //}
+                               map<string, int>::iterator it;
+                               it = globaldata->gTreemap->seqsPerGroup.find("xxx");
+                               if (it == globaldata->gTreemap->seqsPerGroup.end()) { //its a new group
+                                       globaldata->gTreemap->namesOfGroups.push_back("xxx");
+                                       globaldata->gTreemap->seqsPerGroup["xxx"] = 1;
+                               }else {
+                                       globaldata->gTreemap->seqsPerGroup["xxx"]++;
+                               }
                                
-                               //find index in tree of name
-                               //n1 = T->getIndex(name);
-                               //group = "xxx";
-                               //numLeaves++;
-                               //numNodes = 2*numLeaves - 1;
+                               group = "xxx";
                        }
                        
-                       T->tree[n1].setGroup(group);
+                       vector<string> tempGroup; tempGroup.push_back(group);
+                       
+                       T->tree[n1].setGroup(tempGroup);
                        T->tree[n1].setChildren(-1,-1);
                
                        if(blen == 1){  
                                f.get();
-                               error = readBranchLength(f);    
-                               if (error == -1) { readOk = -1; return -1; }    
-                               T->tree[n1].setBranchLength(error);
+                               T->tree[n1].setBranchLength(readBranchLength(f));
                        }else{
                                T->tree[n1].setBranchLength(0.0);
                        }
@@ -392,13 +461,9 @@ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) {
                }
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the ReadNewickTree class Function readNewickInt. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               m->errorOut(e, "ReadNewickTree", "readNewickInt");
                exit(1);
        }
-       catch(...) {
-               cout << "An unknown error has occurred in the ReadNewickTree class function readNewickInt. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }               
 }
 /**************************************************************************************************/
 /**************************************************************************************************/