]> git.donarmstrong.com Git - mothur.git/commitdiff
added remove.seqs command
authorwestcott <westcott>
Wed, 8 Jul 2009 15:40:10 +0000 (15:40 +0000)
committerwestcott <westcott>
Wed, 8 Jul 2009 15:40:10 +0000 (15:40 +0000)
Mothur.xcodeproj/project.pbxproj
commandfactory.cpp
getseqscommand.cpp
getseqscommand.h
removeseqscommand.cpp [new file with mode: 0644]
removeseqscommand.h [new file with mode: 0644]

index af71efc71121f49a68abc399ab55c416e09938ad..dfe49ee66a679a32f986a14a5c17307ee90932c6 100644 (file)
@@ -78,6 +78,7 @@
                37B28F680F27590100808A62 /* deconvolutecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37B28F670F27590100808A62 /* deconvolutecommand.cpp */; };
                37B73C761004BEFD008C4B41 /* listseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37B73C751004BEFD008C4B41 /* listseqscommand.cpp */; };
                37B73CA61004D89A008C4B41 /* getseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37B73CA51004D89A008C4B41 /* getseqscommand.cpp */; };
+               37B73CC01004EB38008C4B41 /* removeseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37B73CBF1004EB38008C4B41 /* removeseqscommand.cpp */; };
                37C1D9730F86506E0059E3F0 /* binsequencecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37C1D9720F86506E0059E3F0 /* binsequencecommand.cpp */; };
                37C753CE0FB3415200DBD02E /* distancecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37C753CD0FB3415200DBD02E /* distancecommand.cpp */; };
                37D928550F21331F001D4494 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 37D927B80F21331F001D4494 /* ace.cpp */; };
                37B73C751004BEFD008C4B41 /* listseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = listseqscommand.cpp; sourceTree = "<group>"; };
                37B73CA41004D89A008C4B41 /* getseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getseqscommand.h; sourceTree = "<group>"; };
                37B73CA51004D89A008C4B41 /* getseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getseqscommand.cpp; sourceTree = "<group>"; };
+               37B73CBE1004EB38008C4B41 /* removeseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removeseqscommand.h; sourceTree = "<group>"; };
+               37B73CBF1004EB38008C4B41 /* removeseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removeseqscommand.cpp; sourceTree = "<group>"; };
                37C1D9710F86506E0059E3F0 /* binsequencecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = binsequencecommand.h; sourceTree = SOURCE_ROOT; };
                37C1D9720F86506E0059E3F0 /* binsequencecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = binsequencecommand.cpp; sourceTree = SOURCE_ROOT; };
                37C753CC0FB3415200DBD02E /* distancecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = distancecommand.h; sourceTree = SOURCE_ROOT; };
                                372E126F0F26365B0095CF7E /* readotucommand.cpp */,
                                37E5F4900F2A3DA800F8D827 /* readtreecommand.h */,
                                37E5F4910F2A3DA800F8D827 /* readtreecommand.cpp */,
+                               37B73CBE1004EB38008C4B41 /* removeseqscommand.h */,
+                               37B73CBF1004EB38008C4B41 /* removeseqscommand.cpp */,
                                7E09C5120FDA79C5002ECAE5 /* reversecommand.h */,
                                7E09C5130FDA79C5002ECAE5 /* reversecommand.cpp */,
                                371B30B30FD7EE67000414CA /* screenseqscommand.h */,
                                379D3D510FF90E090068C1C0 /* chimeraseqscommand.cpp in Sources */,
                                37B73C761004BEFD008C4B41 /* listseqscommand.cpp in Sources */,
                                37B73CA61004D89A008C4B41 /* getseqscommand.cpp in Sources */,
+                               37B73CC01004EB38008C4B41 /* removeseqscommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
index 2783aa3acfe56a38d2479af5696e542f2228693b..c91dbf9abd520fc908698f68756d726d25002e95 100644 (file)
@@ -52,6 +52,7 @@
 #include "chimeraseqscommand.h"
 #include "listseqscommand.h"
 #include "getseqscommand.h"
+#include "removeseqscommand.h"
 
 /***********************************************************/
 
@@ -102,6 +103,7 @@ CommandFactory::CommandFactory(){
        commands["chimera.seqs"]                = "chimera.seqs";
        commands["list.seqs"]                   = "list.seqs";
        commands["get.seqs"]                    = "get.seqs";
+       commands["remove.seqs"]                 = "get.seqs";
        commands["quit"]                                = "quit"; 
 
 }
@@ -161,6 +163,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "chimera.seqs")                  {       command = new ChimeraSeqsCommand(optionString);                 }
                else if(commandName == "list.seqs")                             {       command = new ListSeqsCommand(optionString);                    }
                else if(commandName == "get.seqs")                              {       command = new GetSeqsCommand(optionString);                             }
+               else if(commandName == "remove.seqs")                   {       command = new RemoveSeqsCommand(optionString);                  }
                else if(commandName == "merge.files")                   {       command = new MergeFileCommand(optionString);                   }
                else                                                                                    {       command = new NoCommand(optionString);                                  }
 
index e58cc883b26a33d2dec9526bb52bcbfb7de7741e..ae242b6f56946c544aff5f389b026c2c93a50bee 100644 (file)
@@ -119,19 +119,30 @@ void GetSeqsCommand::readFasta(){
                openInputFile(fastafile, in);
                string name;
                
+               bool wroteSomething = false;
+               
                while(!in.eof()){
                        Sequence currSeq(in);
                        name = currSeq.getName();
                        
                        //if this name is in the accnos file
                        if (names.count(name) == 1) {
+                               wroteSomething = true;
+                               
                                currSeq.printSequence(out);
+                               
+                               names.erase(name);
                        }
                        
                        gobble(in);
                }
                in.close();     
-               out.close();    
+               out.close();
+               
+               if (wroteSomething == false) {
+                       mothurOut("Your file does not contain any sequence from the .accnos file."); mothurOutEndLine();
+                       remove(outputFileName.c_str()); 
+               }
 
        }
        catch(exception& e) {
@@ -152,6 +163,8 @@ void GetSeqsCommand::readName(){
                openInputFile(namefile, in);
                string name, firstCol, secondCol;
                
+               bool wroteSomething = false;
+               
                
                while(!in.eof()){
 
@@ -173,22 +186,31 @@ void GetSeqsCommand::readName(){
                        for (int i = 0; i < parsedNames.size(); i++) {
                                if (names.count(parsedNames[i]) == 1) {
                                        validSecond.push_back(parsedNames[i]);
+                                       names.erase(parsedNames[i]);
                                }
                        }
 
                        
                        //if the name in the first column is in the set then print it and any other names in second column also in set
                        if (names.count(firstCol) == 1) {
+                       
+                               wroteSomething = true;
+                               
                                out << firstCol << '\t';
                                
                                //you know you have at least one valid second since first column is valid
                                for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
                                out << validSecond[validSecond.size()-1] << endl;
+                               
+                               names.erase(firstCol);
                        
                        //make first name in set you come to first column and then add the remaining names to second column
                        }else {
                                //you want part of this row
                                if (validSecond.size() != 0) {
+                               
+                                       wroteSomething = true;
+                                       
                                        out << validSecond[0] << '\t';
                                
                                        //you know you have at least one valid second since first column is valid
@@ -202,6 +224,11 @@ void GetSeqsCommand::readName(){
                in.close();
                out.close();
                
+               if (wroteSomething == false) {
+                       mothurOut("Your file does not contain any sequence from the .accnos file."); mothurOutEndLine();
+                       remove(outputFileName.c_str()); 
+               }
+               
        }
        catch(exception& e) {
                errorOut(e, "GetSeqsCommand", "readName");
@@ -221,6 +248,8 @@ void GetSeqsCommand::readGroup(){
                openInputFile(groupfile, in);
                string name, group;
                
+               bool wroteSomething = false;
+               
                while(!in.eof()){
 
                        in >> name;                             //read from first column
@@ -228,13 +257,22 @@ void GetSeqsCommand::readGroup(){
                        
                        //if this name is in the accnos file
                        if (names.count(name) == 1) {
+                               wroteSomething = true;
+                               
                                out << name << '\t' << group << endl;
+                               
+                               names.erase(name);
                        }
                                        
                        gobble(in);
                }
                in.close();
                out.close();
+               
+               if (wroteSomething == false) {
+                       mothurOut("Your file does not contain any sequence from the .accnos file."); mothurOutEndLine();
+                       remove(outputFileName.c_str()); 
+               }
 
        }
        catch(exception& e) {
@@ -255,6 +293,8 @@ void GetSeqsCommand::readAlign(){
                openInputFile(alignfile, in);
                string name, junk;
                
+               bool wroteSomething = false;
+               
                //read column headers
                for (int i = 0; i < 16; i++) {  
                        if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
@@ -268,7 +308,8 @@ void GetSeqsCommand::readAlign(){
                        
                        //if this name is in the accnos file
                        if (names.count(name) == 1) {
-
+                               wroteSomething = true;
+                               
                                out << name << '\t';
                                
                                //read rest
@@ -278,6 +319,8 @@ void GetSeqsCommand::readAlign(){
                                }
                                out << endl;
                                
+                               names.erase(name);
+                               
                        }else {//still read just don't do anything with it
                                //read rest
                                for (int i = 0; i < 15; i++) {  
@@ -290,7 +333,11 @@ void GetSeqsCommand::readAlign(){
                }
                in.close();
                out.close();
-
+               
+               if (wroteSomething == false) {
+                       mothurOut("Your file does not contain any sequence from the .accnos file."); mothurOutEndLine();
+                       remove(outputFileName.c_str()); 
+               }
                
        }
        catch(exception& e) {
index 47f56f75c52f6ec3bf11128fa06ea14295571649..2bd0754b93715a347e517d7b11be3bf8f478e944 100644 (file)
@@ -6,7 +6,7 @@
  *  Mothur
  *
  *  Created by Sarah Westcott on 7/8/09.
- *  Copyright 2009 __MyCompanyName__. All rights reserved.
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
  *
  */
  
diff --git a/removeseqscommand.cpp b/removeseqscommand.cpp
new file mode 100644 (file)
index 0000000..d3d12cc
--- /dev/null
@@ -0,0 +1,367 @@
+/*
+ *  removeseqscommand.cpp
+ *  Mothur
+ *
+ *  Created by Sarah Westcott on 7/8/09.
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "removeseqscommand.h"
+#include "sequence.hpp"
+
+//**********************************************************************************************************************
+
+RemoveSeqsCommand::RemoveSeqsCommand(string option){
+       try {
+               abort = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"fasta","name", "group", "align", "accnos" };
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (map<string,string>::iterator it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //check for required parameters
+                       accnosfile = validParameter.validFile(parameters, "accnos", true);
+                       if (accnosfile == "not open") { abort = true; }
+                       else if (accnosfile == "not found") {  accnosfile = "";  mothurOut("You must provide an accnos file."); mothurOutEndLine(); abort = true; }     
+                       
+                       fastafile = validParameter.validFile(parameters, "fasta", true);
+                       if (fastafile == "not open") { abort = true; }
+                       else if (fastafile == "not found") {  fastafile = "";  }        
+                       
+                       namefile = validParameter.validFile(parameters, "name", true);
+                       if (namefile == "not open") { abort = true; }
+                       else if (namefile == "not found") {  namefile = "";  }  
+                       
+                       groupfile = validParameter.validFile(parameters, "group", true);
+                       if (groupfile == "not open") { abort = true; }
+                       else if (groupfile == "not found") {  groupfile = "";  }        
+                       
+                       alignfile = validParameter.validFile(parameters, "align", true);
+                       if (alignfile == "not open") { abort = true; }
+                       else if (alignfile == "not found") {  alignfile = "";  }
+                       
+                       if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == ""))  { mothurOut("You must provide one of the following: fasta, name, group, align."); mothurOutEndLine(); abort = true; }
+                       
+                       if (parameters.size() > 2) { mothurOut("You may only enter one of the following: fasta, name, group, align."); mothurOutEndLine(); abort = true;  }
+               }
+
+       }
+       catch(exception& e) {
+               errorOut(e, "RemoveSeqsCommand", "RemoveSeqsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+void RemoveSeqsCommand::help(){
+       try {
+               mothurOut("The remove.seqs command reads an .accnos file and one of the following file types: fasta, name, group or alignreport file.\n");
+               mothurOut("It outputs a file containing the sequences NOT in the .accnos file.\n");
+               mothurOut("The remove.seqs command parameters are accnos, fasta, name, group and align.  You must provide accnos and one of the other parameters.\n");
+               mothurOut("The remove.seqs command should be in the following format: remove.seqs(accnos=yourAccnos, fasta=yourFasta).\n");
+               mothurOut("Example remove.seqs(accnos=amazon.accnos, fasta=amazon.fasta).\n");
+               mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
+       }
+       catch(exception& e) {
+               errorOut(e, "RemoveSeqsCommand", "help");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+int RemoveSeqsCommand::execute(){
+       try {
+               
+               if (abort == true) { return 0; }
+               
+               //get names you want to keep
+               readAccnos();
+               
+               //read through the correct file and output lines you want to keep
+               if (fastafile != "")            {               readFasta();    }
+               else if (namefile != "")        {               readName();             }
+               else if (groupfile != "")       {               readGroup();    }
+               else if (alignfile != "")       {               readAlign();    }
+               
+               return 0;               
+       }
+
+       catch(exception& e) {
+               errorOut(e, "RemoveSeqsCommand", "execute");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+void RemoveSeqsCommand::readFasta(){
+       try {
+               string outputFileName = getRootName(fastafile) + "pick";
+               ofstream out;
+               openOutputFile(outputFileName, out);
+               
+               ifstream in;
+               openInputFile(fastafile, in);
+               string name;
+               
+               bool wroteSomething = false;
+               
+               while(!in.eof()){
+                       Sequence currSeq(in);
+                       name = currSeq.getName();
+                       
+                       //if this name is in the accnos file
+                       if (names.count(name) == 0) {
+                               wroteSomething = true;
+                               
+                               currSeq.printSequence(out);
+                       }else {         names.erase(name);              }
+                       
+                       gobble(in);
+               }
+               in.close();     
+               out.close();
+               
+               if (wroteSomething == false) {
+                       mothurOut("Your file contains only sequences from the .accnos file."); mothurOutEndLine();
+                       remove(outputFileName.c_str()); 
+               }
+
+       }
+       catch(exception& e) {
+               errorOut(e, "RemoveSeqsCommand", "readFasta");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+void RemoveSeqsCommand::readName(){
+       try {
+       
+               string outputFileName = getRootName(namefile) + "pick";
+               ofstream out;
+               openOutputFile(outputFileName, out);
+
+               ifstream in;
+               openInputFile(namefile, in);
+               string name, firstCol, secondCol;
+               
+               bool wroteSomething = false;
+               
+               while(!in.eof()){
+
+                       in >> firstCol;                         
+                       in >> secondCol;                        
+                       
+                       vector<string> parsedNames;
+                       //parse second column saving each name
+                       while (secondCol.find_first_of(',') != -1) { 
+                               name = secondCol.substr(0,secondCol.find_first_of(','));
+                               secondCol = secondCol.substr(secondCol.find_first_of(',')+1, secondCol.length());
+                               parsedNames.push_back(name);
+                       }
+                       
+                       //get name after last ,
+                       parsedNames.push_back(secondCol);
+                       
+                       vector<string> validSecond;
+                       for (int i = 0; i < parsedNames.size(); i++) {
+                               if (names.count(parsedNames[i]) == 0) {
+                                       validSecond.push_back(parsedNames[i]);
+                               }else { names.erase(parsedNames[i]); }
+                       }
+
+                       
+                       //if the name in the first column is in the set then print it and any other names in second column also in set
+                       if (names.count(firstCol) == 0) {
+                               
+                               wroteSomething = true;
+                               
+                               out << firstCol << '\t';
+                               
+                               //you know you have at least one valid second since first column is valid
+                               for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
+                               out << validSecond[validSecond.size()-1] << endl;
+                       
+                       //make first name in set you come to first column and then add the remaining names to second column
+                       }else {
+                               names.erase(firstCol);  
+                                       
+                               //you want part of this row
+                               if (validSecond.size() != 0) {
+                               
+                                       wroteSomething = true;
+                                       
+                                       out << validSecond[0] << '\t';
+                               
+                                       //you know you have at least one valid second since first column is valid
+                                       for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
+                                       out << validSecond[validSecond.size()-1] << endl;
+                               }
+                       }
+                       
+                       gobble(in);
+               }
+               in.close();
+               out.close();
+               
+               if (wroteSomething == false) {
+                       mothurOut("Your file contains only sequences from the .accnos file."); mothurOutEndLine();
+                       remove(outputFileName.c_str()); 
+               }
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "RemoveSeqsCommand", "readName");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+void RemoveSeqsCommand::readGroup(){
+       try {
+       
+               string outputFileName = getRootName(groupfile) + "pick";
+               ofstream out;
+               openOutputFile(outputFileName, out);
+
+               ifstream in;
+               openInputFile(groupfile, in);
+               string name, group;
+               
+               bool wroteSomething = false;
+               
+               while(!in.eof()){
+
+                       in >> name;                             //read from first column
+                       in >> group;                    //read from second column
+                       
+                       //if this name is in the accnos file
+                       if (names.count(name) == 0) {
+                               wroteSomething = true;
+                               out << name << '\t' << group << endl;
+                       }else {         names.erase(name);              }
+                                       
+                       gobble(in);
+               }
+               in.close();
+               out.close();
+               
+               if (wroteSomething == false) {
+                       mothurOut("Your file contains only sequences from the .accnos file."); mothurOutEndLine();
+                       remove(outputFileName.c_str()); 
+               }
+
+       }
+       catch(exception& e) {
+               errorOut(e, "RemoveSeqsCommand", "readGroup");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+//alignreport file has a column header line then all other lines contain 16 columns.  we just want the first column since that contains the name
+void RemoveSeqsCommand::readAlign(){
+       try {
+               string outputFileName = getRootName(alignfile) + "pick";
+               ofstream out;
+               openOutputFile(outputFileName, out);
+
+               ifstream in;
+               openInputFile(alignfile, in);
+               string name, junk;
+               
+               bool wroteSomething = false;
+               
+               //read column headers
+               for (int i = 0; i < 16; i++) {  
+                       if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
+                       else                    {       break;                  }
+               }
+               out << endl;
+               
+               while(!in.eof()){
+
+                       in >> name;                             //read from first column
+                       
+                       //if this name is in the accnos file
+                       if (names.count(name) == 0) {
+                               wroteSomething = true;
+                               
+                               out << name << '\t';
+                               
+                               //read rest
+                               for (int i = 0; i < 15; i++) {  
+                                       if (!in.eof())  {       in >> junk;      out << junk << '\t';   }
+                                       else                    {       break;                  }
+                               }
+                               out << endl;
+                               
+                       }else {//still read just don't do anything with it
+                               names.erase(name);      
+                               
+                               //read rest
+                               for (int i = 0; i < 15; i++) {  
+                                       if (!in.eof())  {       in >> junk;             }
+                                       else                    {       break;                  }
+                               }
+                       }
+                       
+                       gobble(in);
+               }
+               in.close();
+               out.close();
+               
+               if (wroteSomething == false) {
+                       mothurOut("Your file contains only sequences from the .accnos file."); mothurOutEndLine();
+                       remove(outputFileName.c_str()); 
+               }
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "RemoveSeqsCommand", "readAlign");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+void RemoveSeqsCommand::readAccnos(){
+       try {
+               
+               ifstream in;
+               openInputFile(accnosfile, in);
+               string name;
+               
+               while(!in.eof()){
+                       in >> name;
+                                               
+                       names.insert(name);
+                       
+                       gobble(in);
+               }
+               in.close();             
+
+       }
+       catch(exception& e) {
+               errorOut(e, "RemoveSeqsCommand", "readAccnos");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+
diff --git a/removeseqscommand.h b/removeseqscommand.h
new file mode 100644 (file)
index 0000000..434983e
--- /dev/null
@@ -0,0 +1,38 @@
+#ifndef REMOVESEQSCOMMAND_H
+#define REMOVESEQSCOMMAND_H
+
+/*
+ *  removeseqscommand.h
+ *  Mothur
+ *
+ *  Created by Sarah Westcott on 7/8/09.
+ *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+#include "command.hpp"
+
+class RemoveSeqsCommand : public Command {
+       
+       public:
+       
+               RemoveSeqsCommand(string);      
+               ~RemoveSeqsCommand(){};
+               int execute();
+               void help();    
+               
+       private:
+               set<string> names;
+               string accnosfile, fastafile, namefile, groupfile, alignfile;
+               bool abort;
+               
+               void readFasta();
+               void readName();
+               void readGroup();
+               void readAlign();
+               void readAccnos();
+               
+};
+
+#endif
+