rewrote metastats command in c++, added mothurRemove function to handle ~ error....

[mothur.git] / classifyseqscommand.cpp
diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp

index 72ca1d00fed7815ef68a8f1dc321c8c911eaeff0..d9bd698232196c4c3597f8425a8e3f6e8b6b0694 100644 (file)
--- a/classifyseqscommand.cpp
+++ b/classifyseqscommand.cpp
@@ -15,6 +15,7 @@
  #include "knn.h"
  
  
+
  //**********************************************************************************************************************
  vector<string> ClassifySeqsCommand::setParameters(){   
         try {
@@ -34,6 +35,7 @@ vector<string> ClassifySeqsCommand::setParameters(){
                 CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "",false,true); parameters.push_back(pcutoff);
                 CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pprobs);
                 CommandParameter piters("iters", "Number", "", "100", "", "", "",false,true); parameters.push_back(piters);
+               CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
                 CommandParameter pnumwanted("numwanted", "Number", "", "10", "", "", "",false,true); parameters.push_back(pnumwanted);
                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
@@ -63,6 +65,7 @@ string ClassifySeqsCommand::getHelpString(){
  #ifdef USE_MPI
                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
  #endif
+               helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
                 helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
                 helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
                 helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
@@ -103,6 +106,7 @@ ClassifySeqsCommand::ClassifySeqsCommand(){
  ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
         try {
                 abort = false; calledHelp = false;   
+               rdb = ReferenceDB::getInstance();
                 
                 //allow user to run help
                 if(option == "help") { help(); abort = true; calledHelp = true; }
@@ -161,16 +165,6 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                                 }
                         }
  
-                       //check for required parameters
-                       templateFileName = validParameter.validFile(parameters, "reference", true);
-                       if (templateFileName == "not found") { 
-                               m->mothurOut("reference is a required parameter for the classify.seqs command."); 
-                               m->mothurOutEndLine();
-                               abort = true; 
-                       }
-                       else if (templateFileName == "not open") { abort = true; }      
-                       
-                                               
                         fastaFileName = validParameter.validFile(parameters, "fasta", false);
                         if (fastaFileName == "not found") {                             
                                 //if there is a current fasta file, use it
@@ -250,16 +244,6 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
                         }
  
-                       
-                       taxonomyFileName = validParameter.validFile(parameters, "taxonomy", true);
-                       if (taxonomyFileName == "not found") { 
-                               m->mothurOut("taxonomy is a required parameter for the classify.seqs command."); 
-                               m->mothurOutEndLine();
-                               abort = true; 
-                       }
-                       else if (taxonomyFileName == "not open") { abort = true; }      
-                       
-                       
                         namefile = validParameter.validFile(parameters, "name", false);
                         if (namefile == "not found") { namefile = "";  }
  
@@ -397,6 +381,41 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                         temp = validParameter.validFile(parameters, "ksize", false);            if (temp == "not found"){       temp = "8";                             }
                         convert(temp, kmerSize); 
                         
+                       temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
+                       save = m->isTrue(temp); 
+                       rdb->save = save; 
+                       if (save) { //clear out old references
+                               rdb->clearMemory();     
+                       }
+                       
+                       //this has to go after save so that if the user sets save=t and provides no reference we abort
+                       templateFileName = validParameter.validFile(parameters, "reference", true);
+                       if (templateFileName == "not found") { 
+                               //check for saved reference sequences
+                               if (rdb->referenceSeqs.size() != 0) {
+                                       templateFileName = "saved";
+                               }else {
+                                       m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required for the classify.seqs command."); 
+                                       m->mothurOutEndLine();
+                                       abort = true; 
+                               }
+                       }else if (templateFileName == "not open") { abort = true; }     
+                       else {  if (save) {     rdb->setSavedReference(templateFileName);       }       }
+                       
+                       //this has to go after save so that if the user sets save=t and provides no reference we abort
+                       taxonomyFileName = validParameter.validFile(parameters, "taxonomy", true);
+                       if (taxonomyFileName == "not found") { 
+                               //check for saved reference sequences
+                               if (rdb->wordGenusProb.size() != 0) {
+                                       taxonomyFileName = "saved";
+                               }else {
+                                       m->mothurOut("[ERROR]: You don't have any saved taxonomy information and the taxonomy parameter is a required for the classify.seqs command."); 
+                                       m->mothurOutEndLine();
+                                       abort = true; 
+                               }
+                       }else if (taxonomyFileName == "not open") { abort = true; }     
+                       else {  if (save) {     rdb->setSavedTaxonomy(taxonomyFileName);        }       }
+                       
                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                         m->setProcessors(temp);
                         convert(temp, processors); 
@@ -471,7 +490,10 @@ int ClassifySeqsCommand::execute(){
                 
                         m->mothurOut("Classifying sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
                         
-                       string RippedTaxName = m->getRootName(m->getSimpleName(taxonomyFileName));
+                       string baseTName = taxonomyFileName;
+                       if (taxonomyFileName == "saved") {baseTName = rdb->getSavedTaxonomy();  }
+                       
+                       string RippedTaxName = m->getRootName(m->getSimpleName(baseTName));
                         RippedTaxName = m->getExtension(RippedTaxName.substr(0, RippedTaxName.length()-1));
                         if (RippedTaxName[0] == '.') { RippedTaxName = RippedTaxName.substr(1, RippedTaxName.length()); }
                         RippedTaxName +=  "."; 
@@ -543,7 +565,7 @@ int ClassifySeqsCommand::execute(){
                                         //align your part
                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, MPIPos);
                                         
-                                       if (m->control_pressed) {  outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);   MPI_File_close(&outMPITempTax);  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); } delete classify; return 0;  }
+                                       if (m->control_pressed) {  outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);   MPI_File_close(&outMPITempTax);  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } delete classify; return 0;  }
                                         
                                         for (int i = 1; i < processors; i++) {
                                                 int done;
@@ -636,9 +658,9 @@ int ClassifySeqsCommand::execute(){
                         string group = "";
                         if (groupfile != "") {  group = groupfileNames[s]; }
                         
-                       PhyloSummary taxaSum(taxonomyFileName, group);
+                       PhyloSummary taxaSum(baseTName, group);
                         
-                       if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } delete classify; return 0; }
+                       if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        } delete classify; return 0; }
                 
                         if (namefile == "") {  taxaSum.summarize(tempTaxonomyFile);  }
                         else {
@@ -665,9 +687,9 @@ int ClassifySeqsCommand::execute(){
                                 }
                                 in.close();
                         }
-                       remove(tempTaxonomyFile.c_str());
+                       m->mothurRemove(tempTaxonomyFile);
                         
-                       if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } delete classify; return 0; }
+                       if (m->control_pressed) {  outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        } delete classify; return 0; }
                         
                         //print summary file
                         ofstream outTaxTree;
@@ -689,7 +711,7 @@ int ClassifySeqsCommand::execute(){
                         //read taxfile - this reading and rewriting is done to preserve the confidence scores.
                         string name, taxon;
                         while (!inTax.eof()) {
-                               if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } remove(unclass.c_str()); delete classify; return 0; }
+                               if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);        } m->mothurRemove(unclass); delete classify; return 0; }
  
                                 inTax >> name >> taxon; m->gobble(inTax);
                                 
@@ -700,7 +722,7 @@ int ClassifySeqsCommand::execute(){
                         inTax.close();  
                         outTax.close();
                         
-                       remove(newTaxonomyFile.c_str());
+                       m->mothurRemove(newTaxonomyFile);
                         rename(unclass.c_str(), newTaxonomyFile.c_str());
                         
                         m->mothurOutEndLine();
@@ -778,7 +800,7 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
                                 process++;
                         }else if (pid == 0){
                                 num = driver(lines[process], taxFileName + toString(getpid()) + ".temp", tempTaxFile + toString(getpid()) + ".temp", filename);
-                               
+
                                 //pass numSeqs to parent
                                 ofstream out;
                                 string tempFile = filename + toString(getpid()) + ".num.temp";
@@ -808,14 +830,14 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
                         string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
                         m->openInputFile(tempFile, in);
                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
-                       in.close(); remove(tempFile.c_str());
+                       in.close(); m->mothurRemove(m->getFullPathName(tempFile));
                 }
                 
                 for(int i=0;i<processIDS.size();i++){
                         appendTaxFiles((taxFileName + toString(processIDS[i]) + ".temp"), taxFileName);
                         appendTaxFiles((tempTaxFile + toString(processIDS[i]) + ".temp"), tempTaxFile);
-                       remove((taxFileName + toString(processIDS[i]) + ".temp").c_str());
-                       remove((tempTaxFile + toString(processIDS[i]) + ".temp").c_str());
+                       m->mothurRemove((m->getFullPathName(taxFileName) + toString(processIDS[i]) + ".temp"));
+                       m->mothurRemove((m->getFullPathName(tempTaxFile) + toString(processIDS[i]) + ".temp"));
                 }
                 
                 return num;