]> git.donarmstrong.com Git - mothur.git/blobdiff - chimerapintailcommand.cpp
optimizing classify.seqs calculating of template probabilities.
[mothur.git] / chimerapintailcommand.cpp
index b11887d8cdddc110490e436777422dc0b0d86105..c5c678e78a344eed06ea0308c0df6178e7f22a51 100644 (file)
@@ -10,6 +10,7 @@
 #include "chimerapintailcommand.h"
 #include "pintail.h"
 
+
 //**********************************************************************************************************************
 vector<string> ChimeraPintailCommand::setParameters(){ 
        try {
@@ -24,7 +25,8 @@ vector<string> ChimeraPintailCommand::setParameters(){
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
-               
+               CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
+
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
                return myArray;
@@ -50,6 +52,7 @@ string ChimeraPintailCommand::getHelpString(){
 #ifdef USE_MPI
                helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
 #endif
+               helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
                helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
                helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
                helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
@@ -66,6 +69,27 @@ string ChimeraPintailCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
+string ChimeraPintailCommand::getOutputFileNameTag(string type, string inputName=""){  
+       try {
+        string outputFileName = "";
+               map<string, vector<string> >::iterator it;
+        
+        //is this a type this command creates
+        it = outputTypes.find(type);
+        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+        else {
+            if (type == "chimera") {  outputFileName =  "pintail.chimeras"; }
+            else if (type == "accnos") {  outputFileName =  "pintail.accnos"; }
+            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
+        }
+        return outputFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraPintailCommand", "getOutputFileNameTag");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 ChimeraPintailCommand::ChimeraPintailCommand(){        
        try {
                abort = true; calledHelp = true;
@@ -83,9 +107,11 @@ ChimeraPintailCommand::ChimeraPintailCommand(){
 ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
        try {
                abort = false; calledHelp = false;   
+               rdb = ReferenceDB::getInstance();
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
                
                else {
                        vector<string> myArray = setParameters();
@@ -204,6 +230,8 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
                                                        //erase from file list
                                                        fastaFileNames.erase(fastaFileNames.begin()+i);
                                                        i--;
+                                               }else {
+                                                       m->setFastaFile(fastaFileNames[i]);
                                                }
                                        }
                                }
@@ -218,13 +246,35 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
                        
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
-                       convert(temp, processors);
+                       m->mothurConvert(temp, processors);
                        
                        temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
-                       convert(temp, window);
+                       m->mothurConvert(temp, window);
                        
                        temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
-                       convert(temp, increment);
+                       m->mothurConvert(temp, increment);
+                       
+                       temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
+                       save = m->isTrue(temp); 
+                       rdb->save = save; 
+                       if (save) { //clear out old references
+                               rdb->clearMemory();     
+                       }
+                       
+                       //this has to go after save so that if the user sets save=t and provides no reference we abort
+                       templatefile = validParameter.validFile(parameters, "reference", true);
+                       if (templatefile == "not found") { 
+                               //check for saved reference sequences
+                               if (rdb->referenceSeqs.size() != 0) {
+                                       templatefile = "saved";
+                               }else {
+                                       m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
+                                       m->mothurOutEndLine();
+                                       abort = true; 
+                               }
+                       }else if (templatefile == "not open") { abort = true; } 
+                       else {  if (save) {     rdb->setSavedReference(templatefile);   }       }
+                       
                        
                        maskfile = validParameter.validFile(parameters, "mask", false);
                        if (maskfile == "not found") { maskfile = "";  }        
@@ -271,10 +321,6 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
                
-                       templatefile = validParameter.validFile(parameters, "reference", true);
-                       if (templatefile == "not open") { abort = true; }
-                       else if (templatefile == "not found") { templatefile = "";  m->mothurOut("reference is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
-                       
                        consfile = validParameter.validFile(parameters, "conservation", true);
                        if (consfile == "not open") { abort = true; }
                        else if (consfile == "not found") { 
@@ -326,15 +372,18 @@ int ChimeraPintailCommand::execute(){
                        if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
                        
                        //check for quantile to save the time
+                       string baseName = templatefile;
+                       if (templatefile == "saved") { baseName = rdb->getSavedReference(); }
+                       
                        string tempQuan = "";
                        if ((!filter) && (maskfile == "")) {
-                               tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
+                               tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
                        }else if ((!filter) && (maskfile != "")) { 
-                               tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
+                               tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
                        }else if ((filter) && (maskfile != "")) { 
-                               tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
+                               tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
                        }else if ((filter) && (maskfile == "")) { 
-                               tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
+                               tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
                        }
                        
                        ifstream FileTest(tempQuan.c_str());
@@ -347,13 +396,13 @@ int ChimeraPintailCommand::execute(){
                                string tryPath = m->getDefaultPath();
                                string tempQuan = "";
                                if ((!filter) && (maskfile == "")) {
-                                       tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
+                                       tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
                                }else if ((!filter) && (maskfile != "")) { 
-                                       tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
+                                       tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
                                }else if ((filter) && (maskfile != "")) { 
-                                       tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
+                                       tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
                                }else if ((filter) && (maskfile == "")) { 
-                                       tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
+                                       tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
                                }
                                
                                ifstream FileTest2(tempQuan.c_str());
@@ -364,20 +413,19 @@ int ChimeraPintailCommand::execute(){
                                        }
                                }
                        }
-                       
                        chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
                        
                        if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it
                        string outputFileName, accnosFileName;
                        if (maskfile != "") {
-                               outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras";
-                               accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.accnos";
+                               outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + getOutputFileNameTag("chimera");
+                               accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + getOutputFileNameTag("accnos");
                        }else {
-                               outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.chimeras";
-                               accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.accnos";
+                               outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
+                               accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
                        }
                        
-                       if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); }  return 0;    }
+                       if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0;    }
                        
                        if (chimera->getUnaligned()) { 
                                m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
@@ -389,7 +437,7 @@ int ChimeraPintailCommand::execute(){
                #ifdef USE_MPI
                        int pid, numSeqsPerProcessor; 
                                int tag = 2001;
-                               vector<unsigned long int> MPIPos;
+                               vector<unsigned long long> MPIPos;
                                
                                MPI_Status status; 
                                MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
@@ -415,7 +463,7 @@ int ChimeraPintailCommand::execute(){
                                MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
                                MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
                                
-                               if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
+                               if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      m->mothurRemove(outputNames[j]);        }  delete chimera; return 0;  }
 
                                if (pid == 0) { //you are the root process 
                                                                
@@ -435,7 +483,7 @@ int ChimeraPintailCommand::execute(){
                                        //do your part
                                        driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
                                        
-                                       if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  for (int j = 0; j < outputNames.size(); j++) {    remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
+                                       if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  m->mothurRemove(outputFileName);  m->mothurRemove(accnosFileName);  for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        }  delete chimera; return 0;  }
                                        
                                }else{ //you are a child process
                                        MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
@@ -450,7 +498,7 @@ int ChimeraPintailCommand::execute(){
                                        //do your part
                                        driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
                                        
-                                       if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
+                                       if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {      m->mothurRemove(outputNames[j]);        }  delete chimera; return 0;  }
                                }
                                
                                //close files 
@@ -459,19 +507,20 @@ int ChimeraPintailCommand::execute(){
                                MPI_File_close(&outMPIAccnos);
                                MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                #else
-                       vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
-                               
-                       for (int i = 0; i < (positions.size()-1); i++) {
-                               lines.push_back(new linePair(positions[i], positions[(i+1)]));
-                       }       
-                       
+                                               
                        //break up file
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                               vector<unsigned long long> positions = m->divideFile(fastaFileNames[s], processors);
+                       
+                               for (int i = 0; i < (positions.size()-1); i++) {
+                                       lines.push_back(new linePair(positions[i], positions[(i+1)]));
+                               }       
+                       
                                if(processors == 1){
                
                                        numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
                                        
-                                       if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
+                                       if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
                                        
                                }else{
                                        processIDS.resize(0);
@@ -484,19 +533,19 @@ int ChimeraPintailCommand::execute(){
                                        //append output files
                                        for(int i=1;i<processors;i++){
                                                m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
-                                               remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+                                               m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
                                        }
                                        
                                        //append output files
                                        for(int i=1;i<processors;i++){
                                                m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
-                                               remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
+                                               m->mothurRemove((accnosFileName + toString(processIDS[i]) + ".temp"));
                                        }
                                                                                
                                        if (m->control_pressed) { 
-                                               remove(outputFileName.c_str()); 
-                                               remove(accnosFileName.c_str());
-                                               for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } outputTypes.clear();
+                                               m->mothurRemove(outputFileName); 
+                                               m->mothurRemove(accnosFileName);
+                                               for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        } outputTypes.clear();
                                                for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
                                                delete chimera;
                                                return 0;
@@ -504,9 +553,10 @@ int ChimeraPintailCommand::execute(){
                                }
 
                        #else
+                               lines.push_back(new linePair(0, 1000));
                                numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
                                
-                               if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
+                               if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
                        #endif
                        
                #endif  
@@ -582,8 +632,8 @@ int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string
                        }
                        delete candidateSeq;
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                               unsigned long int pos = inFASTA.tellg();
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                               unsigned long long pos = inFASTA.tellg();
                                if ((pos == -1) || (pos >= filePos->end)) { break; }
                        #else
                                if (inFASTA.eof()) { break; }
@@ -608,7 +658,7 @@ int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string
 }
 //**********************************************************************************************************************
 #ifdef USE_MPI
-int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
+int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long long>& MPIPos){
        try {
                                
                MPI_Status status; 
@@ -668,7 +718,7 @@ int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_Fi
 
 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 0;
                int num = 0;
                
@@ -708,7 +758,7 @@ int ChimeraPintailCommand::createProcesses(string outputFileName, string filenam
                        string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
                        m->openInputFile(tempFile, in);
                        if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
-                       in.close(); remove(tempFile.c_str());
+                       in.close(); m->mothurRemove(tempFile);
                }
                
                return num;