]> git.donarmstrong.com Git - mothur.git/blobdiff - chimeracheckcommand.cpp
working on windows paralellization, added trimOligos class to be used by trim.flows...
[mothur.git] / chimeracheckcommand.cpp
index 5b249d7ff6892a53ce0493c3fa5319468681a8bd..3a530fde0ab5b31c71cbe19437984bb8a75578c1 100644 (file)
@@ -8,6 +8,7 @@
  */
 
 #include "chimeracheckcommand.h"
+#include "referencedb.h"
 
 //**********************************************************************************************************************
 vector<string> ChimeraCheckCommand::setParameters(){   
@@ -21,7 +22,8 @@ vector<string> ChimeraCheckCommand::setParameters(){
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
-               
+               CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
+
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
                return myArray;
@@ -50,6 +52,7 @@ string ChimeraCheckCommand::getHelpString(){
                helpString += "The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence, default is False.\n";
                helpString += "The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n";
                helpString += "You may enter multiple name files by separating their names with dashes. ie. fasta=abrecovery.svg.names-amzon.svg.names \n";
+               helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
                helpString += "The chimera.check command should be in the following format: \n";
                helpString += "chimera.check(fasta=yourFastaFile, reference=yourTemplateFile, processors=yourProcessors, ksize=yourKmerSize) \n";
                helpString += "Example: chimera.check(fasta=AD.fasta, reference=core_set_aligned,imputed.fasta, processors=4, ksize=8) \n";
@@ -77,7 +80,8 @@ ChimeraCheckCommand::ChimeraCheckCommand(){
 //***************************************************************************************************************
 ChimeraCheckCommand::ChimeraCheckCommand(string option)  {
        try {
-               abort = false; calledHelp = false;   
+               abort = false; calledHelp = false;  
+               ReferenceDB* rdb = ReferenceDB::getInstance();
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
@@ -195,10 +199,6 @@ ChimeraCheckCommand::ChimeraCheckCommand(string option)  {
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
-
-                       templatefile = validParameter.validFile(parameters, "reference", true);
-                       if (templatefile == "not open") { abort = true; }
-                       else if (templatefile == "not found") { templatefile = "";  m->mothurOut("reference is a required parameter for the chimera.check command."); m->mothurOutEndLine(); abort = true;  }   
                        
                        namefile = validParameter.validFile(parameters, "name", false);
                        if (namefile == "not found") { namefile = ""; }
@@ -283,6 +283,28 @@ ChimeraCheckCommand::ChimeraCheckCommand(string option)  {
                        m->setProcessors(temp);
                        convert(temp, processors);
                        
+                       temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
+                       save = m->isTrue(temp); 
+                       rdb->save = save; 
+                       if (save) { //clear out old references
+                               rdb->clearMemory();     
+                       }
+                       
+                       //this has to go after save so that if the user sets save=t and provides no reference we abort
+                       templatefile = validParameter.validFile(parameters, "reference", true);
+                       if (templatefile == "not found") { 
+                               //check for saved reference sequences
+                               if (rdb->referenceSeqs.size() != 0) {
+                                       templatefile = "saved";
+                               }else {
+                                       m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
+                                       m->mothurOutEndLine();
+                                       abort = true; 
+                               }
+                       }else if (templatefile == "not open") { abort = true; } 
+                       else {  if (save) {     rdb->setSavedReference(templatefile);   }       }
+                       
+                       
                        temp = validParameter.validFile(parameters, "ksize", false);                    if (temp == "not found") { temp = "7"; }
                        convert(temp, ksize);
                        
@@ -327,7 +349,7 @@ int ChimeraCheckCommand::execute(){
                
                                int pid, numSeqsPerProcessor; 
                                int tag = 2001;
-                               vector<unsigned long int> MPIPos;
+                               vector<unsigned long long> MPIPos;
                                
                                MPI_Status status; 
                                MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
@@ -348,7 +370,7 @@ int ChimeraCheckCommand::execute(){
                                MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
                                MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
                                
-                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);  for (int j = 0; j < outputNames.size(); j++) {    remove(outputNames[j].c_str()); } outputTypes.clear(); delete chimera; return 0;  }
+                               if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);  for (int j = 0; j < outputNames.size(); j++) {    m->mothurRemove(outputNames[j]);        } outputTypes.clear(); delete chimera; return 0;  }
                                
                                if (pid == 0) { //you are the root process 
                                        MPIPos = m->setFilePosFasta(fastaFileNames[i], numSeqs); //fills MPIPos, returns numSeqs
@@ -368,7 +390,7 @@ int ChimeraCheckCommand::execute(){
                                        //align your part
                                        driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);
                                        
-                                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);  for (int j = 0; j < outputNames.size(); j++) {    remove(outputNames[j].c_str()); }   outputTypes.clear(); delete chimera; return 0;  }
+                                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);  for (int j = 0; j < outputNames.size(); j++) {    m->mothurRemove(outputNames[j]);        }   outputTypes.clear(); delete chimera; return 0;  }
                                        
                                        //wait on chidren
                                        for(int j = 1; j < processors; j++) { 
@@ -388,7 +410,7 @@ int ChimeraCheckCommand::execute(){
                                        //align your part
                                        driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);
                                        
-                                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); }  outputTypes.clear(); delete chimera; return 0;  }
+                                       if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  outputTypes.clear(); delete chimera; return 0;  }
                                        
                                        //tell parent you are done.
                                        char buf[5];
@@ -402,7 +424,7 @@ int ChimeraCheckCommand::execute(){
                                MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                #else
                        
-                       vector<unsigned long int> positions = m->divideFile(fastaFileNames[i], processors);
+                       vector<unsigned long long> positions = m->divideFile(fastaFileNames[i], processors);
                                
                        for (int s = 0; s < (positions.size()-1); s++) {
                                lines.push_back(new linePair(positions[s], positions[(s+1)]));
@@ -413,7 +435,7 @@ int ChimeraCheckCommand::execute(){
                                if(processors == 1){
                                        numSeqs = driver(lines[0], outputFileName, fastaFileNames[i]);
                                        
-                                       if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); } for (int j = 0; j < lines.size(); j++) {  delete lines[j];  } outputTypes.clear();  lines.clear(); delete chimera; return 0; }
+                                       if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } for (int j = 0; j < lines.size(); j++) {  delete lines[j];  } outputTypes.clear();  lines.clear(); delete chimera; return 0; }
                                                                        
                                }else{
                                        processIDS.resize(0);
@@ -425,11 +447,11 @@ int ChimeraCheckCommand::execute(){
                                        //append output files
                                        for(int j=1;j<processors;j++){
                                                m->appendFiles((outputFileName + toString(processIDS[j]) + ".temp"), outputFileName);
-                                               remove((outputFileName + toString(processIDS[j]) + ".temp").c_str());
+                                               m->mothurRemove((outputFileName + toString(processIDS[j]) + ".temp"));
                                        }
                                        
                                        if (m->control_pressed) { 
-                                               for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } outputTypes.clear();
+                                               for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        } outputTypes.clear();
                                                for (int j = 0; j < lines.size(); j++) {  delete lines[j];  }  lines.clear();
                                                delete chimera;
                                                return 0;
@@ -439,7 +461,7 @@ int ChimeraCheckCommand::execute(){
                        #else
                                numSeqs = driver(lines[0], outputFileName, fastaFileNames[i]);
                                
-                               if (m->control_pressed) { for (int j = 0; j < lines.size(); j++) {  delete lines[j];  }  lines.clear(); for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } outputTypes.clear(); delete chimera; return 0; }
+                               if (m->control_pressed) { for (int j = 0; j < lines.size(); j++) {  delete lines[j];  }  lines.clear(); for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        } outputTypes.clear(); delete chimera; return 0; }
                        #endif
                #endif          
                        delete chimera;
@@ -498,7 +520,7 @@ int ChimeraCheckCommand::driver(linePair* filePos, string outputFName, string fi
                        delete candidateSeq;
                        
                        #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                               unsigned long int pos = inFASTA.tellg();
+                               unsigned long long pos = inFASTA.tellg();
                                if ((pos == -1) || (pos >= filePos->end)) { break; }
                        #else
                                if (inFASTA.eof()) { break; }
@@ -522,7 +544,7 @@ int ChimeraCheckCommand::driver(linePair* filePos, string outputFName, string fi
 }
 //**********************************************************************************************************************
 #ifdef USE_MPI
-int ChimeraCheckCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, vector<unsigned long int>& MPIPos){
+int ChimeraCheckCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, vector<unsigned long long>& MPIPos){
        try {
                MPI_File outAccMPI;
                MPI_Status status; 
@@ -614,7 +636,7 @@ int ChimeraCheckCommand::createProcesses(string outputFileName, string filename)
                        string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
                        m->openInputFile(tempFile, in);
                        if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
-                       in.close(); remove(tempFile.c_str());
+                       in.close(); m->mothurRemove(tempFile);
                }
                
                return num;