]> git.donarmstrong.com Git - mothur.git/blobdiff - chimeraccodecommand.cpp
working on pam
[mothur.git] / chimeraccodecommand.cpp
index bc96e0530993e63cf099381a1ccd8829f9cc7714..d890db4087813cc3c4c4e453b55013fa555e7689 100644 (file)
@@ -9,18 +9,20 @@
 
 #include "chimeraccodecommand.h"
 #include "ccode.h"
+#include "referencedb.h"
 //**********************************************************************************************************************
 vector<string> ChimeraCcodeCommand::setParameters(){   
        try {
-               CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
-               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfilter);
-               CommandParameter pwindow("window", "Number", "", "0", "", "", "",false,false); parameters.push_back(pwindow);
-               CommandParameter pnumwanted("numwanted", "Number", "", "20", "", "", "",false,false); parameters.push_back(pnumwanted);
-               CommandParameter pmask("mask", "String", "", "", "", "", "",false,false); parameters.push_back(pmask);
-               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-mapinfo-accnos",false,true,true); parameters.push_back(pfasta);
+               CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfilter);
+               CommandParameter pwindow("window", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pwindow);
+               CommandParameter pnumwanted("numwanted", "Number", "", "20", "", "", "","",false,false); parameters.push_back(pnumwanted);
+               CommandParameter pmask("mask", "String", "", "", "", "", "","",false,false); parameters.push_back(pmask);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+               CommandParameter psave("save", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psave);
                
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -36,7 +38,7 @@ string ChimeraCcodeCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The chimera.ccode command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
-               helpString += "This command was created using the algorythms described in the 'Evaluating putative chimeric sequences from PCR-amplified products' paper by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez.\n";
+               helpString += "This command was created using the algorithms described in the 'Evaluating putative chimeric sequences from PCR-amplified products' paper by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez.\n";
                helpString += "The chimera.ccode command parameters are fasta, reference, filter, mask, processors, window and numwanted.\n";
                helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
                helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
@@ -49,6 +51,7 @@ string ChimeraCcodeCommand::getHelpString(){
                helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences. \n";
                helpString += "The window parameter allows you to specify the window size for searching for chimeras. \n";
                helpString += "The numwanted parameter allows you to specify how many sequences you would each query sequence compared with.\n";
+               helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
                helpString += "The chimera.ccode command should be in the following format: \n";
                helpString += "chimera.ccode(fasta=yourFastaFile, reference=yourTemplate) \n";
                helpString += "Example: chimera.ccode(fasta=AD.align, reference=core_set_aligned.imputed.fasta) \n";
@@ -61,6 +64,23 @@ string ChimeraCcodeCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
+string ChimeraCcodeCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "chimera") {  pattern = "[filename],[tag],ccode.chimeras-[filename],ccode.chimeras"; } 
+        else if (type == "accnos") {  pattern = "[filename],[tag],ccode.accnos-[filename],ccode.accnos"; } 
+        else if (type == "mapinfo") {  pattern =  "[filename],mapinfo"; }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ChimeraCcodeCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
 ChimeraCcodeCommand::ChimeraCcodeCommand(){    
        try {
                abort = true; calledHelp = true;
@@ -69,6 +89,7 @@ ChimeraCcodeCommand::ChimeraCcodeCommand(){
                outputTypes["chimera"] = tempOutNames;
                outputTypes["mapinfo"] = tempOutNames;
                outputTypes["accnos"] = tempOutNames;
+        
        }
        catch(exception& e) {
                m->errorOut(e, "ChimeraCcodeCommand", "ChimeraCcodeCommand");
@@ -79,6 +100,7 @@ ChimeraCcodeCommand::ChimeraCcodeCommand(){
 ChimeraCcodeCommand::ChimeraCcodeCommand(string option)  {
        try {
                abort = false; calledHelp = false;   
+               ReferenceDB* rdb = ReferenceDB::getInstance();
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
@@ -102,6 +124,7 @@ ChimeraCcodeCommand::ChimeraCcodeCommand(string option)  {
                        outputTypes["chimera"] = tempOutNames;
                        outputTypes["mapinfo"] = tempOutNames;
                        outputTypes["accnos"] = tempOutNames;
+            
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -197,10 +220,6 @@ ChimeraCcodeCommand::ChimeraCcodeCommand(string option)  {
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
-
-                       templatefile = validParameter.validFile(parameters, "reference", true);
-                       if (templatefile == "not open") { abort = true; }
-                       else if (templatefile == "not found") { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.ccode command."); m->mothurOutEndLine(); abort = true;  }            
                        
                        maskfile = validParameter.validFile(parameters, "mask", false);
                        if (maskfile == "not found") { maskfile = "";  }        
@@ -223,13 +242,35 @@ ChimeraCcodeCommand::ChimeraCcodeCommand(string option)  {
                        
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
-                       convert(temp, processors);
+                       m->mothurConvert(temp, processors);
                        
                        temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
-                       convert(temp, window);
+                       m->mothurConvert(temp, window);
                        
                        temp = validParameter.validFile(parameters, "numwanted", false);                if (temp == "not found") { temp = "20"; }
-                       convert(temp, numwanted);
+                       m->mothurConvert(temp, numwanted);
+                       
+                       temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
+                       save = m->isTrue(temp); 
+                       rdb->save = save; 
+                       if (save) { //clear out old references
+                               rdb->clearMemory();     
+                       }
+                       
+                       //this has to go after save so that if the user sets save=t and provides no reference we abort
+                       templatefile = validParameter.validFile(parameters, "reference", true);
+                       if (templatefile == "not found") { 
+                               //check for saved reference sequences
+                               if (rdb->referenceSeqs.size() != 0) {
+                                       templatefile = "saved";
+                               }else {
+                                       m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
+                                       m->mothurOutEndLine();
+                                       abort = true; 
+                               }
+                       }else if (templatefile == "not open") { abort = true; } 
+                       else {  if (save) {     rdb->setSavedReference(templatefile);   }       }
+                       
 
                }
        }
@@ -261,23 +302,20 @@ int ChimeraCcodeCommand::execute(){
                        
                        if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it
                        string outputFileName, accnosFileName;
-                       if (maskfile != "") {
-                               outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + maskfile + ".ccode.chimeras";
-                               accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + maskfile + ".ccode.accnos";
-                       }else {
-                               outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "ccode.chimeras";
-                               accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "ccode.accnos";
-                       }
-
-                       string mapInfo = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "mapinfo";
-                       
-                       if (m->control_pressed) { delete chimera;  for (int j = 0; j < outputNames.size(); j++) {       remove(outputNames[j].c_str()); } outputTypes.clear(); return 0;        }
+            map<string, string> variables; 
+            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
+            string mapInfo = getOutputFileName("mapinfo", variables);
+                       if (maskfile != "") { variables["[tag]"] = maskfile; }
+            outputFileName = getOutputFileName("chimera", variables);
+            accnosFileName = getOutputFileName("accnos", variables);
+                                               
+                       if (m->control_pressed) { delete chimera;  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        } outputTypes.clear(); return 0;        }
                        
                #ifdef USE_MPI
                
                                int pid, numSeqsPerProcessor; 
                                int tag = 2001;
-                               vector<unsigned long int> MPIPos;
+                               vector<unsigned long long> MPIPos;
                                                                
                                MPI_Status status; 
                                MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
@@ -303,7 +341,7 @@ int ChimeraCcodeCommand::execute(){
                                MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
                                MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
 
-                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); } outputTypes.clear();  delete chimera; return 0;  }
+                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        } outputTypes.clear();  delete chimera; return 0;  }
                        
                                if (pid == 0) { //you are the root process 
                                        string outTemp = "For full window mapping info refer to " + mapInfo + "\n";
@@ -333,7 +371,7 @@ int ChimeraCcodeCommand::execute(){
                                        //align your part
                                        driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
                                        
-                                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } outputTypes.clear();  delete chimera; return 0;  }
+                                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  m->mothurRemove(outputFileName);  m->mothurRemove(accnosFileName);  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        } outputTypes.clear();  delete chimera; return 0;  }
 
                                }else{ //you are a child process
                                        MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
@@ -349,7 +387,7 @@ int ChimeraCcodeCommand::execute(){
                                        //align your part
                                        driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
                                        
-                                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); }  outputTypes.clear(); delete chimera; return 0;  }
+                                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  outputTypes.clear(); delete chimera; return 0;  }
                                }
                                
                                //close files 
@@ -368,19 +406,21 @@ int ChimeraCcodeCommand::execute(){
 
                        outHeader.close();
                        
-                       vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
-                               
-                       for (int i = 0; i < (positions.size()-1); i++) {
-                               lines.push_back(new linePair(positions[i], positions[(i+1)]));
-                       }       
+                       
                        
                        //break up file
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                               vector<unsigned long long> positions = m->divideFile(fastaFileNames[s], processors);
+                       
+                               for (int i = 0; i < (positions.size()-1); i++) {
+                                       lines.push_back(new linePair(positions[i], positions[(i+1)]));
+                               }       
+                       
                                if(processors == 1){
                                                                                
                                        numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
                                        
-                                       if (m->control_pressed) { remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {    remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  } outputTypes.clear();  lines.clear(); delete chimera; return 0; }
+                                       if (m->control_pressed) { m->mothurRemove(outputFileName); m->mothurRemove(tempHeader); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  } outputTypes.clear();  lines.clear(); delete chimera; return 0; }
                                        
                                }else{
                                        processIDS.resize(0);
@@ -393,19 +433,19 @@ int ChimeraCcodeCommand::execute(){
                                        //append output files
                                        for(int i=1;i<processors;i++){
                                                m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
-                                               remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+                                               m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
                                        }
                                        
                                        //append output files
                                        for(int i=1;i<processors;i++){
                                                m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
-                                               remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
+                                               m->mothurRemove((accnosFileName + toString(processIDS[i]) + ".temp"));
                                        }
                                        
                                        if (m->control_pressed) { 
-                                               remove(outputFileName.c_str()); 
-                                               remove(accnosFileName.c_str());
-                                               for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } outputTypes.clear();
+                                               m->mothurRemove(outputFileName); 
+                                               m->mothurRemove(accnosFileName);
+                                               for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        } outputTypes.clear();
                                                for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
                                                delete chimera;
                                                return 0;
@@ -414,15 +454,16 @@ int ChimeraCcodeCommand::execute(){
                                }
 
                        #else
+                               lines.push_back(new linePair(0, 1000));
                                numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
                                
-                               if (m->control_pressed) { remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {    remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  } outputTypes.clear();  lines.clear(); delete chimera; return 0; }
+                               if (m->control_pressed) { m->mothurRemove(outputFileName); m->mothurRemove(tempHeader); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  } outputTypes.clear();  lines.clear(); delete chimera; return 0; }
                                
                        #endif
        
                        m->appendFiles(outputFileName, tempHeader);
                
-                       remove(outputFileName.c_str());
+                       m->mothurRemove(outputFileName);
                        rename(tempHeader.c_str(), outputFileName.c_str());
                #endif
                
@@ -499,18 +540,18 @@ int ChimeraCcodeCommand::driver(linePair* filePos, string outputFName, string fi
                        }
                        delete candidateSeq;
                        
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                               unsigned long int pos = inFASTA.tellg();
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                               unsigned long long pos = inFASTA.tellg();
                                if ((pos == -1) || (pos >= filePos->end)) { break; }
                        #else
                                if (inFASTA.eof()) { break; }
                        #endif
                        
                        //report progress
-                       if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
+                       if((count) % 100 == 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n");             }
                }
                //report progress
-               if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
+               if((count) % 100 != 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n");     }
                
                out.close();
                out2.close();
@@ -525,7 +566,7 @@ int ChimeraCcodeCommand::driver(linePair* filePos, string outputFName, string fi
 }
 //**********************************************************************************************************************
 #ifdef USE_MPI
-int ChimeraCcodeCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
+int ChimeraCcodeCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long long>& MPIPos){
        try {
                                
                MPI_Status status; 
@@ -586,7 +627,7 @@ int ChimeraCcodeCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File
 
 int ChimeraCcodeCommand::createProcesses(string outputFileName, string filename, string accnos) {
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 0;
                int num = 0;
                
@@ -626,7 +667,7 @@ int ChimeraCcodeCommand::createProcesses(string outputFileName, string filename,
                        string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
                        m->openInputFile(tempFile, in);
                        if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
-                       in.close(); remove(tempFile.c_str());
+                       in.close(); m->mothurRemove(tempFile);
                }
                
                return num;