]> git.donarmstrong.com Git - mothur.git/commitdiff
fixed read.tree so that it can read trees generated by fasttree
authorwestcott <westcott>
Mon, 14 Jun 2010 17:06:03 +0000 (17:06 +0000)
committerwestcott <westcott>
Mon, 14 Jun 2010 17:06:03 +0000 (17:06 +0000)
getseqscommand.cpp
getseqscommand.h
readtree.cpp
readtreecommand.cpp
removeseqscommand.cpp
tree.cpp
tree.h

index a9463a076cfee738f5171575325a6a92881744d9..8bf93ce68db55f9b380307feb73d83ecf3700c5d 100644 (file)
@@ -130,9 +130,15 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                        taxfile = validParameter.validFile(parameters, "taxonomy", true);
                        if (taxfile == "not open") { abort = true; }
                        else if (taxfile == "not found") {  taxfile = "";  }
-
+                       
+                       string usedDups = "true";
+                       string temp = validParameter.validFile(parameters, "dups", false);      if (temp == "not found") { temp = "false"; usedDups = ""; }
+                       dups = isTrue(temp);
                        
                        if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
+               
+                       if ((usedDups != "") && (namefile == "")) {  m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine();  abort = true; }                       
+
                }
 
        }
@@ -147,7 +153,8 @@ void GetSeqsCommand::help(){
        try {
                m->mothurOut("The get.seqs command reads an .accnos file and any of the following file types: fasta, name, group, list, taxonomy or alignreport file.\n");
                m->mothurOut("It outputs a file containing only the sequences in the .accnos file.\n");
-               m->mothurOut("The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy and alignreport.  You must provide accnos and at least one of the other parameters.\n");
+               m->mothurOut("The get.seqs command parameters are accnos, fasta, name, group, list, taxonomy, alignreport and dups.  You must provide accnos and at least one of the other parameters.\n");
+               m->mothurOut("The dups parameter allows you to add the entire line from a name file if you add any name from the line. default=false. \n");
                m->mothurOut("The get.seqs command should be in the following format: get.seqs(accnos=yourAccnos, fasta=yourFasta).\n");
                m->mothurOut("Example get.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n");
                m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
@@ -171,8 +178,8 @@ int GetSeqsCommand::execute(){
                if (m->control_pressed) { return 0; }
                
                //read through the correct file and output lines you want to keep
-               if (fastafile != "")            {               readFasta();    }
                if (namefile != "")                     {               readName();             }
+               if (fastafile != "")            {               readFasta();    }
                if (groupfile != "")            {               readGroup();    }
                if (alignfile != "")            {               readAlign();    }
                if (listfile != "")                     {               readList();             }
@@ -220,7 +227,7 @@ int GetSeqsCommand::readFasta(){
                        
                        if (name != "") {
                                //if this name is in the accnos file
-                               if (names.count(name) == 1) {
+                               if (names.count(name) != 0) {
                                        wroteSomething = true;
                                        
                                        currSeq.printSequence(out);
@@ -280,11 +287,11 @@ int GetSeqsCommand::readList(){
                                        binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
                                        
                                        //if that name is in the .accnos file, add it
-                                       if (names.count(name) == 1) {  newNames += name + ",";  }
+                                       if (names.count(name) != 0) {  newNames += name + ",";  }
                                }
                        
                                //get last name
-                               if (names.count(binnames) == 1) {  newNames += binnames + ",";  }
+                               if (names.count(binnames) != 0) {  newNames += binnames + ",";  }
 
                                //if there are names in this bin add to new list
                                if (newNames != "") { 
@@ -338,7 +345,10 @@ int GetSeqsCommand::readName(){
                        if (m->control_pressed) { in.close(); out.close(); remove(outputFileName.c_str());  return 0; }
 
                        in >> firstCol;                         
-                       in >> secondCol;                        
+                       in >> secondCol;
+                       
+                       string hold = "";
+                       if (dups) { hold = secondCol; }
                        
                        vector<string> parsedNames;
                        //parse second column saving each name
@@ -353,39 +363,43 @@ int GetSeqsCommand::readName(){
                        
                        vector<string> validSecond;
                        for (int i = 0; i < parsedNames.size(); i++) {
-                               if (names.count(parsedNames[i]) == 1) {
+                               if (names.count(parsedNames[i]) != 0) {
                                        validSecond.push_back(parsedNames[i]);
                                }
                        }
 
-                       
-                       //if the name in the first column is in the set then print it and any other names in second column also in set
-                       if (names.count(firstCol) == 1) {
-                       
+                       if ((dups) && (validSecond.size() != 0)) { //dups = true and we want to add someone, then add everyone
+                               for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
+                               out << firstCol << '\t' << hold << endl;
                                wroteSomething = true;
-                               
-                               out << firstCol << '\t';
-                               
-                               //you know you have at least one valid second since first column is valid
-                               for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
-                               out << validSecond[validSecond.size()-1] << endl;
-                               
-                       
-                       //make first name in set you come to first column and then add the remaining names to second column
                        }else {
-                               //you want part of this row
-                               if (validSecond.size() != 0) {
+                               //if the name in the first column is in the set then print it and any other names in second column also in set
+                               if (names.count(firstCol) != 0) {
                                
                                        wroteSomething = true;
                                        
-                                       out << validSecond[0] << '\t';
-                               
+                                       out << firstCol << '\t';
+                                       
                                        //you know you have at least one valid second since first column is valid
                                        for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
                                        out << validSecond[validSecond.size()-1] << endl;
+                                       
+                               
+                               //make first name in set you come to first column and then add the remaining names to second column
+                               }else {
+                                       //you want part of this row
+                                       if (validSecond.size() != 0) {
+                                       
+                                               wroteSomething = true;
+                                               
+                                               out << validSecond[0] << '\t';
+                                       
+                                               //you know you have at least one valid second since first column is valid
+                                               for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
+                                               out << validSecond[validSecond.size()-1] << endl;
+                                       }
                                }
                        }
-                       
                        gobble(in);
                }
                in.close();
@@ -429,7 +443,7 @@ int GetSeqsCommand::readGroup(){
                        in >> group;                    //read from second column
                        
                        //if this name is in the accnos file
-                       if (names.count(name) == 1) {
+                       if (names.count(name) != 0) {
                                wroteSomething = true;
                                
                                out << name << '\t' << group << endl;
@@ -475,7 +489,7 @@ int GetSeqsCommand::readTax(){
                        in >> tax;                      //read from second column
                        
                        //if this name is in the accnos file
-                       if (names.count(name) == 1) {
+                       if (names.count(name) != 0) {
                                wroteSomething = true;
                                
                                out << name << '\t' << tax << endl;
@@ -530,7 +544,7 @@ int GetSeqsCommand::readAlign(){
                        in >> name;                             //read from first column
                        
                        //if this name is in the accnos file
-                       if (names.count(name) == 1) {
+                       if (names.count(name) != 0) {
                                wroteSomething = true;
                                
                                out << name << '\t';
index 9d8e7960f5c2e949f021602aaf1aa36e7d3e82e0..4f4c809e51e7f9308ae4f372bdd2195eb964f6c3 100644 (file)
@@ -25,7 +25,7 @@ class GetSeqsCommand : public Command {
                set<string> names;
                vector<string> outputNames;
                string accnosfile, fastafile, namefile, groupfile, alignfile, listfile, taxfile, outputDir;
-               bool abort;
+               bool abort, dups;
                
                int readFasta();
                int readName();
index c805edc9896bf15b4e7397305c13d8bfbce175d8..1dd77f9152b241321f6658d0e0efc025c01a1a71 100644 (file)
@@ -165,6 +165,7 @@ int ReadNewickTree::read() {
                if (error != 0) { readOk = error; } 
                
                filehandle.close();
+
                return readOk;
        }
        catch(exception& e) {
@@ -237,14 +238,15 @@ int ReadNewickTree::readTreeString() {
                        // ';' means end of tree.                                                                                               
                        else if((ch=filehandle.peek())==';' || ch=='['){                
                                rooted = 1;                                                                     
-                       }                                                                                               
+                       }       
+               
                        if(rooted != 1){                                                                
                                rc = readNewickInt(filehandle, n, T);
                                if (rc == -1) { m->mothurOut("error with rc"); m->mothurOutEndLine(); return -1; } //reports an error in reading
                                if(filehandle.peek() == ')'){                                   
                                        readSpecialChar(filehandle,')',"right parenthesis");
                                }                                                                                       
-                       }                                                                                               
+                       }       
                }
                //note: treeclimber had the code below added - not sure why?
                else{
@@ -263,7 +265,8 @@ int ReadNewickTree::readTreeString() {
                        lc = rc = -1;
                } 
                
-               while(((ch=filehandle.get())!=';') && (filehandle.eof() != true)){;}                                            
+               while(((ch=filehandle.get())!=';') && (filehandle.eof() != true)){;}    
+                                                       
                if(rooted != 1){                                                                        
                        T->tree[n].setChildren(lc,rc);
                        T->tree[n].setBranchLength(0);
@@ -271,6 +274,8 @@ int ReadNewickTree::readTreeString() {
                        if(lc!=-1){             T->tree[lc].setParent(n);               }
                        if(rc!=-1){             T->tree[rc].setParent(n);               }
                }
+               
+               //T->printTree(); cout << endl;
                return 0;
        
        }
@@ -287,9 +292,7 @@ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) {
                if (m->control_pressed) { return -1; } 
                
                int c = readNodeChar(f);
-  string k;
-k = c;
-       cout << "at beginning = " << k <<endl;  
+
                if(c == '('){
                
                        //to account for multifurcating trees generated by fasttree, we are forcing them to be bifurcating
@@ -298,18 +301,15 @@ k = c;
                        while(f.peek() != ')'){
                                int child = readNewickInt(f, n, T);
                                if (child == -1) { return -1; } //reports an error in reading
-                               
+               //cout << "child = " << child << endl;          
                                childrenNodes.push_back(child);
                                
                                //after a child you either have , or ), check for both
                                if(f.peek()==')'){  break;  }
                                else if (f.peek()==',') {   readSpecialChar(f,',',"comma");  }
-                               else { string k;
-                       k = f.peek();
-       cout << "in here k = " << k << '\t' << f.tellg() <<endl;
- }
+                               else {;}
                        }
-       cout << childrenNodes.size() << endl;           
+       //cout << childrenNodes.size() << endl;         
                        if (childrenNodes.size() < 2) {  m->mothurOut("Error in tree, please correct."); m->mothurOutEndLine(); return -1; }
                        
                        //then force into 2 node structure
@@ -317,31 +317,24 @@ k = c;
                        
                                int lc, rc;
                                if (i == 1) { lc = childrenNodes[i-1]; rc = childrenNodes[i]; }
-                               else { lc = n; rc = childrenNodes[i]; }
-                       cout << i << '\t' << lc << '\t' << rc << endl;  
+                               else { lc = n-1; rc = childrenNodes[i]; }
+                       //cout << i << '\t' << lc << '\t' << rc << endl;        
                                T->tree[n].setChildren(lc,rc);
                                T->tree[lc].setParent(n);
                                T->tree[rc].setParent(n);
                                
-                               T->printTree(); cout << endl;
+                               //T->printTree(); cout << endl;
                                n++;
                        }
                        
                        //to account for extra ++ in looping
                        n--;
-                       //int lc = readNewickInt(f, n, T);
-                       //if (lc == -1) { return -1; } //reports an error in reading
-                       
-                       //readSpecialChar(f,',',"comma");
-
-                       //int rc = readNewickInt(f, n, T);
-                       //if (rc == -1) { return -1; }  //reports an error in reading   
                        
                        if(f.peek()==')'){      
                                readSpecialChar(f,')',"right parenthesis");     
                                //to pass over labels in trees
                                c=filehandle.get();
-                               while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ c=filehandle.get(); }
+                               while((c!=',') && (c != -1) && (c!= ':') && (c!=';')&& (c!=')')){ c=filehandle.get(); }
                                filehandle.putback(c);
                        }                       
                
@@ -355,47 +348,6 @@ k = c;
                                T->tree[n].setBranchLength(0.0); 
                        }                                               
                        
-                       //to account for multifurcating trees generated by fasttree, we are forcing them to be bifurcating
-                       /*while(f.peek() == ','){
-                       string k;
-                       k = f.peek();
-       cout << "in here k = " << k << '\t' << f.tellg() <<endl;
-                               //force this node to be left child and read new rc
-                               T->tree[n].setChildren(lc,rc);
-                               T->tree[lc].setParent(n);
-                               T->tree[rc].setParent(n);
-                               
-                               T->printTree(); cout << endl;
-                               lc = n;
-                               n++;
-                               
-                               readSpecialChar(f,',',"comma");
-
-                               rc = readNewickInt(f, n, T);
-               
-                               if (rc == -1) { return -1; }  //reports an error in reading     
-                               
-                               if(f.peek()==')'){      
-                                       readSpecialChar(f,')',"right parenthesis");     
-                                       //to pass over labels in trees
-                                       c=filehandle.get();
-                                       while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ c=filehandle.get(); }
-                                       filehandle.putback(c);
-                                       
-                                       if(f.peek() == ':'){                                                                          
-                                               readSpecialChar(f,':',"colon"); 
-                                       
-                                               if(n >= numNodes){      m->mothurOut("Error: Too many nodes in input tree\n");  readOk = -1; return -1; }
-                                       
-                                               T->tree[n].setBranchLength(readBranchLength(f));
-                                       }else{
-                                               T->tree[n].setBranchLength(0.0); 
-                                       }                                               
-
-                                       break;
-                               }                       
-                       }*/
-               
                        //T->tree[n].setChildren(lc,rc);
                        //T->tree[lc].setParent(n);
                        //T->tree[rc].setParent(n);
@@ -411,7 +363,7 @@ k = c;
                                name += d;
                                d=f.get();
                        }
-               cout << name << endl;
+//cout << name << endl;
                        int blen = 0;
                        if(d == ':')    {               blen = 1;       }               
                
@@ -455,6 +407,7 @@ k = c;
                        }
                
                        while((c=f.get())!=0 && (c != ':' && c != ',' && c!=')') )              {;}             
+       
                        f.putback(c);
                
                        return n1;
index 1762983a148af80403eb66b68bd2834f703a84d8..8e292af4db27b4bdbbba829fa5280c9723dfb2f7 100644 (file)
@@ -147,7 +147,7 @@ int ReadTreeCommand::execute(){
                                delete globaldata->gTreemap;
                                return 0;
                        }
-                       
+       
                        T[i]->assembleTree();
                }
 
index da6bc44297d474e75d9866c69ff3d790ae92052a..5f5fe6535d6e159850694204efc83253387b9d4c 100644 (file)
@@ -154,7 +154,7 @@ void RemoveSeqsCommand::help(){
                m->mothurOut("The remove.seqs command reads an .accnos file and at least one of the following file types: fasta, name, group, list, taxonomy or alignreport file.\n");
                m->mothurOut("It outputs a file containing the sequences NOT in the .accnos file.\n");
                m->mothurOut("The remove.seqs command parameters are accnos, fasta, name, group, list, taxonomy, alignreport and dups.  You must provide accnos and at least one of the file parameters.\n");
-               m->mothurOut("The dups parameter allows you to remove the entire line from a name file if you remove any name from the line. default=false. If dups=true, then remove.seqs outputs a new .accnos file containing all the sequences removed. \n");
+               m->mothurOut("The dups parameter allows you to remove the entire line from a name file if you remove any name from the line. default=false. \n");
                m->mothurOut("The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n");
                m->mothurOut("Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n");
                m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
@@ -178,8 +178,8 @@ int RemoveSeqsCommand::execute(){
                if (m->control_pressed) { return 0; }
                
                //read through the correct file and output lines you want to keep
-               if (fastafile != "")            {               readFasta();    }
                if (namefile != "")                     {               readName();             }
+               if (fastafile != "")            {               readFasta();    }
                if (groupfile != "")            {               readGroup();    }
                if (alignfile != "")            {               readAlign();    }
                if (listfile != "")                     {               readList();             }
@@ -325,12 +325,7 @@ int RemoveSeqsCommand::readName(){
        try {
                if (outputDir == "") {  outputDir += hasPath(namefile);  }
                string outputFileName = outputDir + getRootName(getSimpleName(namefile)) + "pick" + getExtension(namefile);
-               string outputFileName2 = outputDir + getRootName(getSimpleName(namefile)) + "dups.accnos";
 
-               ofstream out2;
-               if (dups) {      openOutputFile(outputFileName2, out2); }
-               bool wroteDups = false;
-               
                ofstream out;
                openOutputFile(outputFileName, out);
 
@@ -341,7 +336,7 @@ int RemoveSeqsCommand::readName(){
                bool wroteSomething = false;
                
                while(!in.eof()){
-                       if (m->control_pressed) { in.close();  out.close();  remove(outputFileName.c_str()); if (dups) { out2.close(); remove(outputFileName2.c_str()); } return 0; }
+                       if (m->control_pressed) { in.close();  out.close();  remove(outputFileName.c_str());  return 0; }
 
                        in >> firstCol;                         
                        in >> secondCol;                        
@@ -365,9 +360,8 @@ int RemoveSeqsCommand::readName(){
                                }
                        }
                        
-                       if ((dups) && (validSecond.size() != parsedNames.size())) { 
-                               wroteDups = true;
-                               for (int i = 0; i < parsedNames.size(); i++) {  out2 << parsedNames[i] << endl;  }
+                       if ((dups) && (validSecond.size() != parsedNames.size())) {  //if dups is true and we want to get rid of anyone, get rid of everyone
+                               for (int i = 0; i < parsedNames.size(); i++) {  names.insert(parsedNames[i]);  }
                        }else {
                                        //if the name in the first column is in the set then print it and any other names in second column also in set
                                if (names.count(firstCol) == 0) {
@@ -401,11 +395,6 @@ int RemoveSeqsCommand::readName(){
                in.close();
                out.close();
                
-               if (dups) { out2.close();  }
-               if (wroteDups == false) {
-                       remove(outputFileName2.c_str()); 
-               }else { outputNames.push_back(outputFileName2); }
-               
                if (wroteSomething == false) {
                        m->mothurOut("Your file contains only sequences from the .accnos file."); m->mothurOutEndLine();
                        remove(outputFileName.c_str()); 
index 400a72ae388a88c5b94aa2b8a717471f600b33ed..8e459810589cbc23e9f443884406028382e4e212 100644 (file)
--- a/tree.cpp
+++ b/tree.cpp
@@ -736,20 +736,39 @@ int Tree::readTreeString(ifstream& filehandle)    {
 //cout << " at beginning of while " <<  k << endl;                     
                        if(c == ')')  {    
                                //to pass over labels in trees
-                               string label = readLabel(filehandle);
+                               c=filehandle.get();
+                               while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ c=filehandle.get(); }
+                               filehandle.putback(c);
                        }
-                       
                        if(c == ';') { return 0; }
                        if(c == -1) { return 0; }
-                       
                        //if you are a name
                        if((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != '\t') && (c != 32)) { //32 is space
-                               name = readName(filehandle);
+                               name = "";
+                               c = filehandle.get();
+                       //k = c;
+//cout << k << endl;
+                               while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) {                       
+                                       name += c;
+                                       c = filehandle.get();
+                       //k = c;
+//cout << " in name while " << k << endl;
+                               }
+                               
+//cout << "name = " << name << endl;
                                globaldata->Treenames.push_back(name);
+                               filehandle.putback(c);
+//k = c;
+//cout << " after putback" <<  k << endl;
                        } 
                        
                        if(c  == ':') { //read until you reach the end of the branch length
-                               string bl = readBranchLength(filehandle);
+                               while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) {
+                                       c = filehandle.get();
+       //k = c;
+       //cout << " in branch while " << k << endl;
+                               }
+                               filehandle.putback(c);
                        }
                
                        c = filehandle.get();
@@ -770,68 +789,6 @@ int Tree::readTreeString(ifstream& filehandle)     {
 }      
 
 /*******************************************************/
-string Tree::readLabel(ifstream& filehandle)   {
-       try {
-               
-               string label = "";
-               
-               //to pass over labels in trees
-               int c=filehandle.get();
-               while((c!=',') && (c != -1) && (c!= ':') && (c!=';')){ label += c; c=filehandle.get(); }
-               filehandle.putback(c);
-               
-               return label;
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "Tree", "readLabel");
-               exit(1);
-       }
-}      
-/*******************************************************/
-string Tree::readName(ifstream& filehandle)    {
-       try {
-               
-               string name = "";
-               int c = filehandle.get();
-               
-               while ((c != '(') && (c != ')') && (c != ',') && (c != ':') && (c != '\n') && (c != 32) && (c != '\t')) {                       
-                       name += c;
-                       c = filehandle.get();
-               }
-                               
-//cout << "name = " << name << endl;
-               filehandle.putback(c);
-               
-               return name;
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "Tree", "readName");
-               exit(1);
-       }
-}      
-/*******************************************************/
-string Tree::readBranchLength(ifstream& filehandle)    {
-       try {
-               
-               string br = "";
-               int c;
-               while ((c != '(') && (c != ')') && (c != ',') && (c != ';') && (c != '\n') && (c != '\t') && (c != 32)) {
-                       br += c;
-                       c = filehandle.get();
-               }
-               filehandle.putback(c);
-               
-               return br;
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "Tree", "readBranchLength");
-               exit(1);
-       }
-}      
 
 /*******************************************************/
-/*******************************************************/
 
diff --git a/tree.h b/tree.h
index 99cd68fa7f4b71dfeebffc90ad649a1f01c6564e..603f1c3160f984248e427353e3bc745bd4f5c4b0 100644 (file)
--- a/tree.h
+++ b/tree.h
@@ -62,10 +62,7 @@ private:
                                                        //not included in the tree. 
                                                        //only takes names from the first tree in the tree file and assumes that all trees use the same names.
        int readTreeString(ifstream&);
-       string readLabel(ifstream&);
-       string readName(ifstream&);
-       string readBranchLength(ifstream&);
-       
+               
        MothurOut* m;
        
 };