]> git.donarmstrong.com Git - mothur.git/blobdiff - chimeraslayercommand.cpp
added blastlocation to chimera.slayer and fixed issue with clearcut getting current...
[mothur.git] / chimeraslayercommand.cpp
index 636e17536a024f04476fc04ce431bb6524e29839..7c35453a8993363512c9121aed83cd664ebcb799 100644 (file)
@@ -10,6 +10,7 @@
 #include "chimeraslayercommand.h"
 #include "chimeraslayer.h"
 #include "deconvolutecommand.h"
+#include "referencedb.h"
 
 //**********************************************************************************************************************
 vector<string> ChimeraSlayerCommand::setParameters(){  
@@ -35,9 +36,11 @@ vector<string> ChimeraSlayerCommand::setParameters(){
                CommandParameter pdivergence("divergence", "Number", "", "1.007", "", "", "",false,false); parameters.push_back(pdivergence);
                CommandParameter pparents("parents", "Number", "", "3", "", "", "",false,false); parameters.push_back(pparents);
                CommandParameter pincrement("increment", "Number", "", "5", "", "", "",false,false); parameters.push_back(pincrement);
+               CommandParameter pblastlocation("blastlocation", "String", "", "", "", "", "",false,false); parameters.push_back(pblastlocation);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
-               
+               CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
+
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
                return myArray;
@@ -53,9 +56,9 @@ string ChimeraSlayerCommand::getHelpString(){
                string helpString = "";
                helpString += "The chimera.slayer command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
                helpString += "This command was modeled after the chimeraSlayer written by the Broad Institute.\n";
-               helpString += "The chimera.slayer command parameters are fasta, name, template, processors, trim, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment, numwanted, and realign.\n";
+               helpString += "The chimera.slayer command parameters are fasta, name, template, processors, trim, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment, numwanted, blastlocation and realign.\n";
                helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
-               helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
+               helpString += "The name parameter allows you to provide a name file, if you are using reference=self. \n";
                helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
                helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
                helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
@@ -79,6 +82,8 @@ string ChimeraSlayerCommand::getHelpString(){
                helpString += "The minsnp parameter allows you to specify percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default: 10) \n";
                helpString += "The search parameter allows you to specify search method for finding the closest parent. Choices are blast, and kmer, default blast. \n";
                helpString += "The realign parameter allows you to realign the query to the potential parents. Choices are true or false, default true.  \n";
+               helpString += "The blastlocation parameter allows you to specify the location of your blast executable. By default mothur will look in ./blast/bin relative to mothur's executable.  \n";
+               helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
                helpString += "The chimera.slayer command should be in the following format: \n";
                helpString += "chimera.slayer(fasta=yourFastaFile, reference=yourTemplate, search=yourSearch) \n";
                helpString += "Example: chimera.slayer(fasta=AD.align, reference=core_set_aligned.imputed.fasta, search=kmer) \n";
@@ -109,6 +114,7 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(){
 ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
        try {
                abort = false; calledHelp = false;   
+               ReferenceDB* rdb = ReferenceDB::getInstance();
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
@@ -296,12 +302,29 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
                        
+                       string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
+                       m->setProcessors(temp);
+                       convert(temp, processors);
+                       
+                       temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
+                       save = m->isTrue(temp); 
+                       rdb->save = save; 
+                       if (save) { //clear out old references
+                               rdb->clearMemory();     
+                       }
                        
                        string path;
                        it = parameters.find("reference");
                        //user has given a template file
                        if(it != parameters.end()){ 
-                               if (it->second == "self") { templatefile = "self"; }
+                               if (it->second == "self") { 
+                                       templatefile = "self"; 
+                                       if (save) {
+                                               m->mothurOut("[WARNING]: You can't save reference=self, ignoring save."); 
+                                               m->mothurOutEndLine();
+                                               save = false;
+                                       }
+                               }
                                else {
                                        path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
@@ -309,14 +332,34 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
                                        
                                        templatefile = validParameter.validFile(parameters, "reference", true);
                                        if (templatefile == "not open") { abort = true; }
-                                       else if (templatefile == "not found") { templatefile = "";  m->mothurOut("reference is a required parameter for the chimera.slayer command, unless and namefile is given."); m->mothurOutEndLine(); abort = true;  }    
+                                       else if (templatefile == "not found") { //check for saved reference sequences
+                                               if (rdb->referenceSeqs.size() != 0) {
+                                                       templatefile = "saved";
+                                               }else {
+                                                       m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
+                                                       m->mothurOutEndLine();
+                                                       abort = true; 
+                                               }
+                                       }else { if (save) {     rdb->setSavedReference(templatefile);   }       }       
                                }
-                       }else if (hasName) {  templatefile = "self"; }
-                       else { templatefile = "";  m->mothurOut("reference is a required parameter for the chimera.slayer command."); m->mothurOutEndLine(); abort = true;  }   
+                       }else if (hasName) {  templatefile = "self"; 
+                               if (save) {
+                                       m->mothurOut("[WARNING]: You can't save reference=self, ignoring save."); 
+                                       m->mothurOutEndLine();
+                                       save = false;
+                               }
+                       }
+                       else { 
+                               if (rdb->referenceSeqs.size() != 0) {
+                                       templatefile = "saved";
+                               }else {
+                                       m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
+                                       m->mothurOutEndLine();
+                                       templatefile = ""; abort = true; 
+                               } 
+                       }
+                       
                        
-                       string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
-                       m->setProcessors(temp);
-                       convert(temp, processors);
                        
                        temp = validParameter.validFile(parameters, "ksize", false);                    if (temp == "not found") { temp = "7"; }
                        convert(temp, ksize);
@@ -367,8 +410,47 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
                        
                        temp = validParameter.validFile(parameters, "numwanted", false);                if (temp == "not found") { temp = "15"; }               
                        convert(temp, numwanted);
+                       
+                       blastlocation = validParameter.validFile(parameters, "blastlocation", false);   
+                       if (blastlocation == "not found") { blastlocation = ""; }
+                       else {
+                               //add / to name if needed
+                               string lastChar = blastlocation.substr(blastlocation.length()-1);
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               if (lastChar != "/") { blastlocation += "/"; }
+#else
+                               if (lastChar != "\\") { blastlocation += "\\"; }        
+#endif
+                               blastlocation = m->getFullPathName(blastlocation);
+                               string formatdbCommand = "";
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               formatdbCommand = blastlocation + "formatdb";   
+#else
+                               formatdbCommand = blastlocation + "formatdb.exe";
+#endif
+                               
+                               //test to make sure formatdb exists
+                               ifstream in;
+                               formatdbCommand = m->getFullPathName(formatdbCommand);
+                               int ableToOpen = m->openInputFile(formatdbCommand, in, "no error"); in.close();
+                               if(ableToOpen == 1) {   m->mothurOut("[ERROR]: " + formatdbCommand + " file does not exist. mothur requires formatdb.exe to run chimera.slayer."); m->mothurOutEndLine(); abort = true; }
+                               
+                               string blastCommand = "";
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               blastCommand = blastlocation + "megablast";     
+#else
+                               blastCommand = blastlocation + "megablast.exe";
+#endif
+                               //test to make sure formatdb exists
+                               ifstream in2;
+                               blastCommand = m->getFullPathName(blastCommand);
+                               ableToOpen = m->openInputFile(blastCommand, in2, "no error"); in2.close();
+                               if(ableToOpen == 1) {   m->mothurOut("[ERROR]: " + blastCommand + " file does not exist. mothur requires blastall.exe to run chimera.slayer."); m->mothurOutEndLine(); abort = true; }
+                       }
 
                        if ((search != "blast") && (search != "kmer")) { m->mothurOut(search + " is not a valid search."); m->mothurOutEndLine(); abort = true;  }
+                       
+                       if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
                }
        }
        catch(exception& e) {
@@ -381,7 +463,7 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
 int ChimeraSlayerCommand::execute(){
        try{
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
-               
+                       
                for (int s = 0; s < fastaFileNames.size(); s++) {
                                
                        m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
@@ -389,7 +471,7 @@ int ChimeraSlayerCommand::execute(){
                        int start = time(NULL); 
                        
                        if (templatefile != "self") { //you want to run slayer with a refernce template
-                               chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);     
+                               chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation);      
                        }else {
                                if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
                                string nameFile = "";
@@ -421,9 +503,9 @@ int ChimeraSlayerCommand::execute(){
                                map<string, int> priority = sortFastaFile(fastaFileNames[s], nameFile);
                                m->mothurOut("Done."); m->mothurOutEndLine();
                                
-                               if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       remove(outputNames[j].c_str()); }  return 0;    }
+                               if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }
 
-                               chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, priority, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);   
+                               chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, priority, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation);    
                        }
                                
                        if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
@@ -431,7 +513,7 @@ int ChimeraSlayerCommand::execute(){
                        string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "slayer.accnos";
                        string trimFastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "slayer.fasta";
                        
-                       if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); }  return 0;    }
+                       if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        }  return 0;    }
                        
                        if (chimera->getUnaligned()) { 
                                m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
@@ -474,7 +556,7 @@ int ChimeraSlayerCommand::execute(){
                                MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
                                if (trim) { MPI_File_open(MPI_COMM_WORLD, outFastaFilename, outMode, MPI_INFO_NULL, &outMPIFasta); }
 
-                       if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) {  MPI_File_close(&outMPIFasta);  } MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); }   delete chimera; return 0;  }
+                       if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) {  MPI_File_close(&outMPIFasta);  } MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }   delete chimera; return 0;  }
                        
                                if (pid == 0) { //you are the root process 
                                        m->mothurOutEndLine();
@@ -523,7 +605,7 @@ int ChimeraSlayerCommand::execute(){
                                        
                                        if (numSeqs == numNoParents) {  m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
                                        
-                                       if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); }  MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); }  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  delete chimera; return 0;  }
+                                       if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); }  MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  m->mothurRemove(outputFileName);  m->mothurRemove(accnosFileName);  delete chimera; return 0;  }
 
                                }else{ //you are a child process
                                        if (templatefile != "self") { //if template=self we can only use 1 processor
@@ -542,7 +624,7 @@ int ChimeraSlayerCommand::execute(){
                                                int numNoParents = chimera->getNumNoParents();
                                                MPI_Send(&numNoParents, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
                                        
-                                               if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); }  MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
+                                               if (m->control_pressed) { outputTypes.clear();  MPI_File_close(&inMPI);  MPI_File_close(&outMPI); if (trim) { MPI_File_close(&outMPIFasta); }  MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  delete chimera; return 0;  }
                                
                                        }
                                }
@@ -576,7 +658,7 @@ int ChimeraSlayerCommand::execute(){
                                        int numNoParents = chimera->getNumNoParents();
                                        if (numNoParents == numSeqs) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
                                        
-                                       if (m->control_pressed) { outputTypes.clear(); if (trim) { remove(trimFastaFileName.c_str()); } remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
+                                       if (m->control_pressed) { outputTypes.clear(); if (trim) { m->mothurRemove(trimFastaFileName); } m->mothurRemove(outputFileName); m->mothurRemove(tempHeader); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
                                        
                                }else{
                                        processIDS.resize(0);
@@ -590,23 +672,23 @@ int ChimeraSlayerCommand::execute(){
                                        //append output files
                                        for(int i=1;i<processors;i++){
                                                m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
-                                               remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+                                               m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
                                        }
                                        
                                        //append output files
                                        for(int i=1;i<processors;i++){
                                                m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
-                                               remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
+                                               m->mothurRemove((accnosFileName + toString(processIDS[i]) + ".temp"));
                                        }
                                        
                                        if (trim) {
                                                for(int i=1;i<processors;i++){
                                                        m->appendFiles((trimFastaFileName + toString(processIDS[i]) + ".temp"), trimFastaFileName);
-                                                       remove((trimFastaFileName + toString(processIDS[i]) + ".temp").c_str());
+                                                       m->mothurRemove((trimFastaFileName + toString(processIDS[i]) + ".temp"));
                                                }
                                        }
                                        
-                                       if (m->control_pressed) { outputTypes.clear(); if (trim) { remove(trimFastaFileName.c_str()); } remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
+                                       if (m->control_pressed) { outputTypes.clear(); if (trim) { m->mothurRemove(trimFastaFileName); } m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
                                }
 
                        #else
@@ -616,13 +698,13 @@ int ChimeraSlayerCommand::execute(){
                                if (numNoParents == numSeqs) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
 
                                
-                               if (m->control_pressed) { outputTypes.clear(); if (trim) { remove(trimFastaFileName.c_str()); } remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {      remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
+                               if (m->control_pressed) { outputTypes.clear(); if (trim) { m->mothurRemove(trimFastaFileName); } m->mothurRemove(outputFileName); m->mothurRemove(tempHeader); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
                                
                        #endif
                        
                        m->appendFiles(outputFileName, tempHeader);
                
-                       remove(outputFileName.c_str());
+                       m->mothurRemove(outputFileName);
                        rename(tempHeader.c_str(), outputFileName.c_str());
                        
                #endif
@@ -951,7 +1033,7 @@ int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename
                        string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
                        m->openInputFile(tempFile, in);
                        if (!in.eof()) { int tempNum = 0; int tempNumParents = 0; in >> tempNum >> tempNumParents; num += tempNum; numNoParents += tempNumParents; }
-                       in.close(); remove(tempFile.c_str());
+                       in.close(); m->mothurRemove(tempFile);
                }
                
                if (num == numNoParents) {  m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }