]> git.donarmstrong.com Git - mothur.git/blobdiff - shhhseqscommand.cpp
changed random forest output filename
[mothur.git] / shhhseqscommand.cpp
index 625b93922c15a72096b2a9bb1550185f14dc579e..82d956189a6f025fd57dc7e901e585456f59f8ab 100644 (file)
 //**********************************************************************************************************************
 vector<string> ShhhSeqsCommand::setParameters(){       
        try {
-               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
-               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
-               CommandParameter psigma("sigma", "Number", "", "0.01", "", "", "",false,false); parameters.push_back(psigma);
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta-map",false,true,true); parameters.push_back(pfasta);
+               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none","name",false,true,true); parameters.push_back(pname);
+               CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pgroup);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+               CommandParameter psigma("sigma", "Number", "", "0.01", "", "", "","",false,false); parameters.push_back(psigma);
                
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -37,7 +37,7 @@ string ShhhSeqsCommand::getHelpString(){
                string helpString = "";
                helpString += "The shhh.seqs command reads a fasta and name file and ....\n";
                helpString += "The shhh.seqs command parameters are fasta, name, group, sigma and processors.\n";
-               helpString += "The fasta parameter allows you to enter the fasta file containing your potentially sequences, and is required, unless you have a valid current fasta file. \n";
+               helpString += "The fasta parameter allows you to enter the fasta file containing your sequences, and is required, unless you have a valid current fasta file. \n";
                helpString += "The name parameter allows you to provide a name file associated with your fasta file. It is required. \n";
                helpString += "The group parameter allows you to provide a group file.  When checking sequences, only sequences from the same group as the query sequence will be used as the reference. \n";
                helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
@@ -54,6 +54,24 @@ string ShhhSeqsCommand::getHelpString(){
                exit(1);
        }
 }
+//**********************************************************************************************************************
+string ShhhSeqsCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "fasta")            {   pattern = "[filename],shhh_seqs.fasta";   }
+        else if (type == "name")    {   pattern = "[filename],shhh_seqs.names";   }
+        else if (type == "map")        {   pattern = "[filename],shhh_seqs.map";   }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ShhhSeqsCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+
 //**********************************************************************************************************************
 
 ShhhSeqsCommand::ShhhSeqsCommand(){    
@@ -160,11 +178,17 @@ ShhhSeqsCommand::ShhhSeqsCommand(string option) {
                        else {   m->setGroupFile(groupfile);  }
                        
                        string temp     = validParameter.validFile(parameters, "sigma", false);         if(temp == "not found"){        temp = "0.01"; }
-                       convert(temp, sigma); 
-                       
+                       m->mothurConvert(temp, sigma); 
+                       sigma = 1/sigma;
+            
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
-                       convert(temp, processors);
+                       m->mothurConvert(temp, processors);
+                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(fastafile);
+                               parser.getNameFile(files);
+                       }
                }
        }
        catch(exception& e) {
@@ -178,10 +202,13 @@ int ShhhSeqsCommand::execute() {
                
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
-               if (outputDir == "") { outputDir = m->hasPath(fastafile);  }//if user entered a file with a path then preserve it                               
-               string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "shhh.fasta";
-               string nameFileName = outputDir + m->getRootName(m->getSimpleName(fastafile))  + "shhh.names";
-               string mapFileName = outputDir + m->getRootName(m->getSimpleName(fastafile))  + "shhh.map";
+               if (outputDir == "") { outputDir = m->hasPath(fastafile);  }//if user entered a file with a path then preserve it               
+               
+        map<string, string> variables; 
+               variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
+               string outputFileName = getOutputFileName("fasta",variables);
+               string nameFileName = getOutputFileName("name",variables);
+               string mapFileName = getOutputFileName("map",variables);
                
                if (groupfile != "") {
                        //Parse sequences by group
@@ -196,13 +223,16 @@ int ShhhSeqsCommand::execute() {
                        m->openOutputFile(nameFileName, out1); out1.close();
                        mapFileName = outputDir + m->getRootName(m->getSimpleName(fastafile))  + "shhh.";
                        
-                       if(processors == 1)     {       driverGroups(parser, outputFileName, nameFileName, mapFileName, 0, groups.size(), groups);      }
-                       else                            {       createProcessesGroups(parser, outputFileName, nameFileName, mapFileName, groups);                       }
+                       vector<string> mapFileNames;
+                       if(processors == 1)     {       mapFileNames = driverGroups(parser, outputFileName, nameFileName, mapFileName, 0, groups.size(), groups);       }
+                       else                            {       mapFileNames = createProcessesGroups(parser, outputFileName, nameFileName, mapFileName, groups);                        }
                        
-                       if (m->control_pressed) {    return 0;  }                               
+                       if (m->control_pressed) {    return 0;  }       
                        
-                       //deconvolute results by running unique.seqs
+                       for (int j = 0; j < mapFileNames.size(); j++) { outputNames.push_back(mapFileNames[j]); outputTypes["map"].push_back(mapFileNames[j]); }
                        
+                       //deconvolute results by running unique.seqs
+                       deconvoluteResults(outputFileName, nameFileName);
                        
                        if (m->control_pressed) {   return 0;   }                               
                        
@@ -227,13 +257,13 @@ int ShhhSeqsCommand::execute() {
                        if (m->control_pressed) { m->mothurRemove(distFileName); return 0; }
                        
                        driver(noise, sequences, uniqueNames, redundantNames, seqFreq, distFileName, outputFileName, nameFileName, mapFileName); 
+                       outputNames.push_back(mapFileName); outputTypes["map"].push_back(mapFileName);
                }
                
                if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
                
                outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
                outputNames.push_back(nameFileName); outputTypes["name"].push_back(nameFileName);
-               outputNames.push_back(mapFileName); outputTypes["map"].push_back(mapFileName);
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
@@ -335,11 +365,12 @@ int ShhhSeqsCommand::loadData(correctDist* correct, seqNoise& noise, vector<stri
        }
 }
 /**************************************************************************************************/
-int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFName, string newNName, string newMName, vector<string> groups) {
+vector<string> ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFName, string newNName, string newMName, vector<string> groups) {
        try {
                
                vector<int> processIDS;
                int process = 1;
+               vector<string> mapfileNames;
                
                //sanity check
                if (groups.size() < processors) { processors = groups.size(); }
@@ -354,7 +385,7 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa
                        lines.push_back(linePair(startIndex, endIndex));
                }
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)          
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)         
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -364,7 +395,18 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa
                                processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                process++;
                        }else if (pid == 0){
-                               driverGroups(parser, newFName + toString(getpid()) + ".temp", newNName + toString(getpid()) + ".temp", newMName, lines[process].start, lines[process].end, groups);
+                               mapfileNames = driverGroups(parser, newFName + toString(getpid()) + ".temp", newNName + toString(getpid()) + ".temp", newMName, lines[process].start, lines[process].end, groups);
+                               
+                               //pass filenames to parent
+                               ofstream out;
+                               string tempFile = newMName + toString(getpid()) + ".temp";
+                               m->openOutputFile(tempFile, out);
+                               out << mapfileNames.size() << endl;
+                               for (int i = 0; i < mapfileNames.size(); i++) {
+                                       out << mapfileNames[i] << endl;
+                               }
+                               out.close();
+                               
                                exit(0);
                        }else { 
                                m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
@@ -374,7 +416,7 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa
                }
                
                //do my part
-               driverGroups(parser, newFName, newNName, newMName, lines[0].start, lines[0].end, groups);
+               mapfileNames = driverGroups(parser, newFName, newNName, newMName, lines[0].start, lines[0].end, groups);
                
                //force parent to wait until all the processes are done
                for (int i=0;i<processIDS.size();i++) { 
@@ -382,6 +424,22 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa
                        wait(&temp);
                }
                
+               //append output files
+               for(int i=0;i<processIDS.size();i++){
+                       ifstream in;
+                       string tempFile =  newMName + toString(processIDS[i]) + ".temp";
+                       m->openInputFile(tempFile, in);
+                       if (!in.eof()) { 
+                               int tempNum = 0; in >> tempNum;  m->gobble(in);
+                               for (int j = 0; j < tempNum; j++) {
+                                       string filename;
+                                       in >> filename; m->gobble(in);
+                                       mapfileNames.push_back(filename);
+                               }
+                       }
+                       in.close(); m->mothurRemove(tempFile);
+                       
+               }
 #else
                
                //////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -397,7 +455,7 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa
                for( int i=1; i<processors; i++ ){
                        // Allocate memory for thread data.
                        string extension = toString(i) + ".temp";
-                       
+
                        shhhseqsData* tempShhhseqs = new shhhseqsData(fastafile, namefile, groupfile, (newFName+extension), (newNName+extension), newMName, groups, m, lines[i].start, lines[i].end, sigma, i);
                        pDataArray.push_back(tempShhhseqs);
                        processIDS.push_back(i);
@@ -409,13 +467,19 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa
                
                
                //using the main process as a worker saves time and memory
-               driverGroups(parser, newFName, newNName, newMName, lines[0].start, lines[0].end, groups);
+               mapfileNames = driverGroups(parser, newFName, newNName, newMName, lines[0].start, lines[0].end, groups);
                
                //Wait until all threads have terminated.
                WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
                
                //Close all thread handles and free memory allocations.
                for(int i=0; i < pDataArray.size(); i++){
+            if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
+            }
+                       for (int j = 0; j < pDataArray[i]->mapfileNames.size(); j++) {
+                               mapfileNames.push_back(pDataArray[i]->mapfileNames[j]);
+                       }
                        CloseHandle(hThreadArray[i]);
                        delete pDataArray[i];
                }
@@ -431,7 +495,7 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa
                        m->mothurRemove((newNName + toString(processIDS[i]) + ".temp"));
                }
                
-               return 0;       
+               return mapfileNames;    
                
        }
        catch(exception& e) {
@@ -440,14 +504,16 @@ int ShhhSeqsCommand::createProcessesGroups(SequenceParser& parser, string newFNa
        }
 }
 /**************************************************************************************************/
-int ShhhSeqsCommand::driverGroups(SequenceParser& parser, string newFFile, string newNFile, string newMFile, int start, int end, vector<string> groups){
+vector<string> ShhhSeqsCommand::driverGroups(SequenceParser& parser, string newFFile, string newNFile, string newMFile, int start, int end, vector<string> groups){
        try {
                
+               vector<string> mapFileNames;
+               
                for (int i = start; i < end; i++) {
                        
                        start = time(NULL);
                        
-                       if (m->control_pressed) {  return 0; }
+                       if (m->control_pressed) {  return mapFileNames; }
                        
                        m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[i] + ":"); m->mothurOutEndLine();
                        
@@ -465,26 +531,27 @@ int ShhhSeqsCommand::driverGroups(SequenceParser& parser, string newFFile, strin
                        
                        //load this groups info in order
                        loadData(correct, noise, sequences, uniqueNames, redundantNames, seqFreq, thisNameMap, thisSeqs);
-                       if (m->control_pressed) { return 0; }
+                       if (m->control_pressed) { return mapFileNames; }
                        
                        //calc distances for cluster
                        string distFileName = outputDir + m->getRootName(m->getSimpleName(fastafile)) + groups[i] + ".shhh.dist";
                        correct->execute(distFileName);
                        delete correct;
                        
-                       if (m->control_pressed) { m->mothurRemove(distFileName); return 0; }
+                       if (m->control_pressed) { m->mothurRemove(distFileName); return mapFileNames; }
                        
                        driver(noise, sequences, uniqueNames, redundantNames, seqFreq, distFileName, newFFile+groups[i], newNFile+groups[i], newMFile+groups[i]+".map"); 
                        
-                       if (m->control_pressed) { return 0; }
+                       if (m->control_pressed) { return mapFileNames; }
                        
                        m->appendFiles(newFFile+groups[i], newFFile); m->mothurRemove(newFFile+groups[i]);
                        m->appendFiles(newNFile+groups[i], newNFile); m->mothurRemove(newNFile+groups[i]);
+                       mapFileNames.push_back(newMFile+groups[i]+".map");
                        
                        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process group " + groups[i] + "."); m->mothurOutEndLine(); 
                }
                
-               return 0;
+               return mapFileNames;
        }
        catch(exception& e) {
                m->errorOut(e, "ShhhSeqsCommand", "driverGroups");
@@ -652,21 +719,22 @@ int ShhhSeqsCommand::deconvoluteResults(string fastaFile, string nameFile){
                string inputString = "fasta=" + fastaFile + ", name=" + nameFile;
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
-               
+               m->mothurCalling = true;
+        
                Command* uniqueCommand = new DeconvoluteCommand(inputString);
                uniqueCommand->execute();
                
                map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
                
                delete uniqueCommand;
-               
+               m->mothurCalling = false;
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                
                string newnameFile = filenames["name"][0];
                string newfastaFile = filenames["fasta"][0];
                
                m->mothurRemove(fastaFile); rename(newfastaFile.c_str(), fastaFile.c_str()); 
-               m->mothurRemove(nameFile); rename(newnameFile.c_str(), nameFile.c_str()); 
+               if (nameFile != newnameFile) { m->mothurRemove(nameFile); rename(newnameFile.c_str(), nameFile.c_str()); }
                
                return 0;
        }