]> git.donarmstrong.com Git - mothur.git/blobdiff - shhhercommand.cpp
added check to make sure shhh.flows child processes finish properly. added subsamplin...
[mothur.git] / shhhercommand.cpp
index a09825e4b9a3cd11dbe77b3dd82e703481c650bc..08bb017a4d025113c7317167e3eb6217fa57638d 100644 (file)
@@ -150,14 +150,21 @@ ShhherCommand::ShhherCommand(string option) {
                        else{
                                ofstream temp;
                 
-                string thisoutputDir = m->hasPath(flowFilesFileName); //if user entered a file with a path then preserve it
+                string thisoutputDir = outputDir;
+                if (outputDir == "") {  thisoutputDir =  m->hasPath(flowFilesFileName); } //if user entered a file with a path then preserve it
                 
-                               //flow.files = 9 character offset
-                               compositeFASTAFileName = thisoutputDir + m->getRootName(m->getSimpleName(flowFilesFileName)) + "shhh.fasta";
+                               //we want to rip off .files, and also .flow if its there
+                string fileroot = m->getRootName(m->getSimpleName(flowFilesFileName));
+                if (fileroot[fileroot.length()-1] == '.') {  fileroot = fileroot.substr(0, fileroot.length()-1); } //rip off dot
+                string extension = m->getExtension(fileroot);
+                if (extension == ".flow") { fileroot = m->getRootName(fileroot);  }
+                else { fileroot += "."; } //add back if needed
+                
+                               compositeFASTAFileName = thisoutputDir + fileroot + "shhh.fasta";
                                m->openOutputFile(compositeFASTAFileName, temp);
                                temp.close();
                                
-                               compositeNamesFileName = thisoutputDir + m->getRootName(m->getSimpleName(flowFilesFileName)) + "shhh.names";
+                               compositeNamesFileName = thisoutputDir + fileroot + "shhh.names";
                                m->openOutputFile(compositeNamesFileName, temp);
                                temp.close();
                        }
@@ -1927,12 +1934,14 @@ int ShhherCommand::createProcesses(vector<string> filenames){
                
                //divide the groups between the processors
                vector<linePair> lines;
+        vector<int> numFilesToComplete;
                int numFilesPerProcessor = filenames.size() / processors;
                for (int i = 0; i < processors; i++) {
                        int startIndex =  i * numFilesPerProcessor;
                        int endIndex = (i+1) * numFilesPerProcessor;
                        if(i == (processors - 1)){      endIndex = filenames.size();    }
                        lines.push_back(linePair(startIndex, endIndex));
+            numFilesToComplete.push_back((endIndex-startIndex));
                }
                
         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)         
@@ -1946,6 +1955,14 @@ int ShhherCommand::createProcesses(vector<string> filenames){
                                process++;
                        }else if (pid == 0){
                                num = driver(filenames, compositeFASTAFileName + toString(getpid()) + ".temp", compositeNamesFileName  + toString(getpid()) + ".temp", lines[process].start, lines[process].end);
+                
+                //pass numSeqs to parent
+                               ofstream out;
+                               string tempFile = compositeFASTAFileName + toString(getpid()) + ".num.temp";
+                               m->openOutputFile(tempFile, out);
+                               out << num << endl;
+                               out.close();
+                
                                exit(0);
                        }else { 
                                m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
@@ -2008,6 +2025,18 @@ int ShhherCommand::createProcesses(vector<string> filenames){
         #endif
         
         for (int i=0;i<processIDS.size();i++) { 
+            ifstream in;
+                       string tempFile =  compositeFASTAFileName + toString(processIDS[i]) + ".num.temp";
+                       m->openInputFile(tempFile, in);
+                       if (!in.eof()) { 
+                int tempNum = 0; 
+                in >> tempNum; 
+                if (tempNum != numFilesToComplete[i+1]) {
+                    m->mothurOut("[ERROR]: main process expected " + toString(processIDS[i]) + " to complete " + toString(numFilesToComplete[i+1]) + " files, and it only reported completing " + toString(tempNum) + ". This will cause file mismatches.  The flow files may be too large to process with multiple processors. \n");
+                }
+            }
+                       in.close(); m->mothurRemove(tempFile);
+            
             if (compositeFASTAFileName != "") {
                 m->appendFiles((compositeFASTAFileName + toString(processIDS[i]) + ".temp"), compositeFASTAFileName);
                 m->appendFiles((compositeNamesFileName + toString(processIDS[i]) + ".temp"), compositeNamesFileName);
@@ -2046,18 +2075,24 @@ vector<string> ShhherCommand::parseFlowFiles(string filename){
             out << thisNumFLows << endl;
             files.push_back(outputFileName);
             
+            int numLinesWrote = 0;
             for (int i = 0; i < largeSize; i++) {
                 if (in.eof()) { break; }
-                string line = m->getline(in);
+                string line = m->getline(in); m->gobble(in);
                 out << line << endl;
+                numLinesWrote++;
             }
             out.close();
+            
+            if (numLinesWrote == 0) {  m->mothurRemove(outputFileName); files.pop_back();  }
             count++;
         }
         in.close();
         
         if (m->control_pressed) { for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }  files.clear(); }
         
+        m->mothurOut("\nDivided " + filename + " into " + toString(files.size()) + " files.\n\n"); 
+        
         return files;
     }
        catch(exception& e) {
@@ -2070,6 +2105,8 @@ vector<string> ShhherCommand::parseFlowFiles(string filename){
 int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFileName, string thisCompositeNamesFileName, int start, int end){
     try {
         
+        int numCompleted = 0;
+        
         for(int i=start;i<end;i++){
                        
                        if (m->control_pressed) { break; }
@@ -2263,17 +2300,18 @@ int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFil
                         m->mothurRemove(otuCountsFileName);
                         m->appendFiles(groupFileName, (thisOutputDir + m->getRootName(m->getSimpleName(theseFlowFileNames[0])) + "shhh.groups"));
                         m->mothurRemove(groupFileName);
-                        m->mothurRemove(theseFlowFileNames[g]);
                     }
+                    m->mothurRemove(theseFlowFileNames[g]);
                 }
                        }
             
+            numCompleted++;
                        m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
                }
                
         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
         
-        return 0;
+        return numCompleted;
         
     }catch(exception& e) {
             m->errorOut(e, "ShhherCommand", "driver");
@@ -3319,7 +3357,9 @@ void ShhherCommand::writeClusters(string otuCountsFileName, int numOTUs, int num
                                                        //otuCountsFile << base;
                                                }
                                        }
-                                       otuCountsFile << newSeq.substr(4) << endl;
+                                       
+                    if (newSeq.length() >= 4) {  otuCountsFile << newSeq.substr(4) << endl;  }
+                    else {  otuCountsFile << "NNNN" << endl;  }
                                }
                                otuCountsFile << endl;
                        }