]> git.donarmstrong.com Git - mothur.git/blobdiff - shhhercommand.cpp
added threshold parameter to make.contigs command.
[mothur.git] / shhhercommand.cpp
index 5d8263c86e2a1f3eae24da047897e2c642d243eb..508e2a90610d1b3705972a976580d90937358427 100644 (file)
@@ -67,6 +67,13 @@ ShhherCommand::ShhherCommand(){
 
 ShhherCommand::ShhherCommand(string option) {
        try {
+        
+#ifdef USE_MPI
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+               MPI_Comm_size(MPI_COMM_WORLD, &ncpus);
+        
+               if(pid == 0){
+#endif
                abort = false; calledHelp = false;   
                
                //allow user to run help
@@ -121,13 +128,15 @@ ShhherCommand::ShhherCommand(string option) {
                                        if (path == "") {       parameters["file"] = inputDir + it->second;             }
                                }
                        }
-                       
-                       
+            
+            //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
+            
                        //check for required parameters
                        flowFileName = validParameter.validFile(parameters, "flow", true);
                        flowFilesFileName = validParameter.validFile(parameters, "file", true);
                        if (flowFileName == "not found" && flowFilesFileName == "not found") {
-                               m->mothurOut("values for either flow or file must be provided for the shhh.seqs command.");
+                               m->mothurOut("values for either flow or file must be provided for the shhh.flows command.");
                                m->mothurOutEndLine();
                                abort = true; 
                        }
@@ -139,13 +148,15 @@ ShhherCommand::ShhherCommand(string option) {
                        }
                        else{
                                ofstream temp;
-
+                
+                string thisoutputDir = m->hasPath(flowFilesFileName); //if user entered a file with a path then preserve it
+                
                                //flow.files = 9 character offset
-                               compositeFASTAFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "shhh.fasta";
+                               compositeFASTAFileName = thisoutputDir + m->getRootName(m->getSimpleName(flowFilesFileName)) + "shhh.fasta";
                                m->openOutputFile(compositeFASTAFileName, temp);
                                temp.close();
                                
-                               compositeNamesFileName = flowFilesFileName.substr(0, flowFilesFileName.length()-10) + "shhh.names";
+                               compositeNamesFileName = thisoutputDir + m->getRootName(m->getSimpleName(flowFilesFileName)) + "shhh.names";
                                m->openOutputFile(compositeNamesFileName, temp);
                                temp.close();
                        }
@@ -207,17 +218,10 @@ ShhherCommand::ShhherCommand(string option) {
                 if (flowFileVector.size() == 0) {  m->mothurOut("[ERROR]: no valid files."); m->mothurOutEndLine(); abort = true; }
             }
             else{
+                if (outputDir == "") { outputDir = m->hasPath(flowFileName); }
                 flowFileVector.push_back(flowFileName);
             }
-
-                       
-                       //if the user changes the output directory command factory will send this info to us in the output parameter 
-                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
-                               outputDir = ""; 
-                               outputDir += m->hasPath(flowFileName); //if user entered a file with a path then preserve it    
-                       }
-                       
-                       
+               
                        //check for optional parameter and set defaults
                        // ...at some point should added some additional type checking...
                        string temp;
@@ -314,6 +318,9 @@ ShhherCommand::ShhherCommand(string option) {
                        }
                        
                }
+#ifdef USE_MPI
+               }                               
+#endif
        }
        catch(exception& e) {
                m->errorOut(e, "ShhherCommand", "ShhherCommand");
@@ -328,9 +335,7 @@ int ShhherCommand::execute(){
                
                int tag = 1976;
                MPI_Status status; 
-               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
-               MPI_Comm_size(MPI_COMM_WORLD, &ncpus);
-        
+                       
                if(pid == 0){
 
                        for(int i=1;i<ncpus;i++){
@@ -344,6 +349,22 @@ int ShhherCommand::execute(){
                        getSingleLookUp();      if (m->control_pressed) { return 0; }
                        getJointLookUp();       if (m->control_pressed) { return 0; }
                        
+            vector<string> flowFileVector;
+                       if(flowFilesFileName != "not found"){
+                               string fName;
+                
+                               ifstream flowFilesFile;
+                               m->openInputFile(flowFilesFileName, flowFilesFile);
+                               while(flowFilesFile){
+                                       fName = m->getline(flowFilesFile);
+                                       flowFileVector.push_back(fName);
+                                       m->gobble(flowFilesFile);
+                               }
+                       }
+                       else{
+                               flowFileVector.push_back(flowFileName);
+                       }
+            
                        int numFiles = flowFileVector.size();
 
                        for(int i=1;i<ncpus;i++){
@@ -410,6 +431,8 @@ int ShhherCommand::execute(){
                                
                                if (m->control_pressed) { break; }
                                
+                
+                
                                getOTUData(listFileName);
 
                                m->mothurRemove(distFileName);
@@ -422,6 +445,7 @@ int ShhherCommand::execute(){
 
                                if (m->control_pressed) { break; }
                                
+            
                                for(int i=1;i<ncpus;i++){
                                        MPI_Send(&numOTUs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
                                        MPI_Send(&singleLookUp[0], singleLookUp.size(), MPI_DOUBLE, i, tag, MPI_COMM_WORLD);
@@ -719,6 +743,36 @@ int ShhherCommand::execute(){
        }
 }
 /**************************************************************************************************/
+string ShhherCommand::createNamesFile(){
+       try{
+               
+               vector<string> duplicateNames(numUniques, "");
+               for(int i=0;i<numSeqs;i++){
+                       duplicateNames[mapSeqToUnique[i]] += seqNameVector[i] + ',';
+               }
+               
+               string nameFileName = flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
+               
+               ofstream nameFile;
+               m->openOutputFile(nameFileName, nameFile);
+               
+               for(int i=0;i<numUniques;i++){
+                       
+                       if (m->control_pressed) { break; }
+                       
+            //                 nameFile << seqNameVector[mapUniqueToSeq[i]] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
+                       nameFile << mapUniqueToSeq[i] << '\t' << duplicateNames[i].substr(0, duplicateNames[i].find_last_of(',')) << endl;
+               }
+               
+               nameFile.close();
+               return  nameFileName;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ShhherCommand", "createNamesFile");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
 
 string ShhherCommand::flowDistMPI(int startSeq, int stopSeq){
        try{            
@@ -1823,7 +1877,7 @@ int ShhherCommand::execute(){
                
         if (numFiles < processors) { processors = numFiles; }
         
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
         if (processors == 1) { driver(flowFileVector, compositeFASTAFileName, compositeNamesFileName, 0, flowFileVector.size()); }
         else { createProcesses(flowFileVector); } //each processor processes one file
 #else
@@ -1869,7 +1923,7 @@ int ShhherCommand::createProcesses(vector<string> filenames){
                        lines.push_back(linePair(startIndex, endIndex));
                }
                
-        #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)          
+        #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)         
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -2397,7 +2451,7 @@ int ShhherCommand::cluster(string filename, string distFileName, string namesFil
                NameAssignment* clusterNameMap = new NameAssignment(namesFileName);
                clusterNameMap->readMap();
                read->read(clusterNameMap);
-               
+        
                ListVector* list = read->getListVector();
                SparseMatrix* matrix = read->getMatrix();
                
@@ -2929,7 +2983,7 @@ void ShhherCommand::writeQualities(int numOTUs, int numFlowCells, string filenam
        
        try {
                string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
+               if (outputDir == "") {  thisOutputDir = m->hasPath(filename);  }
                string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.qual";
         
                ofstream qualityFile;
@@ -3012,11 +3066,12 @@ void ShhherCommand::writeQualities(int numOTUs, int numFlowCells, string filenam
                        
                        if(otuCounts[i] > 0){
                                qualityFile << '>' << seqNameVector[mapUniqueToSeq[i]] << endl;
-                               
+                       
                                int j=4;        //need to get past the first four bases
                                while(qualities[i][j] != -1){
-                                       qualityFile << qualities[i][j] << ' ';
-                                       j++;
+                    qualityFile << qualities[i][j] << ' ';
+                    if (j > qualities[i].size()) { break; }
+                    j++;
                                }
                                qualityFile << endl;
                        }
@@ -3036,7 +3091,7 @@ void ShhherCommand::writeQualities(int numOTUs, int numFlowCells, string filenam
 void ShhherCommand::writeSequences(string thisCompositeFASTAFileName, int numOTUs, int numFlowCells, string filename, vector<int> otuCounts, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& centroids){
        try {
                string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
+               if (outputDir == "") {  thisOutputDir = m->hasPath(filename);  }
                string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.fasta";
                ofstream fastaFile;
                m->openOutputFile(fastaFileName, fastaFile);
@@ -3084,7 +3139,7 @@ void ShhherCommand::writeSequences(string thisCompositeFASTAFileName, int numOTU
 void ShhherCommand::writeNames(string thisCompositeNamesFileName, int numOTUs, string filename, vector<int> otuCounts, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU){
        try {
                string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
+               if (outputDir == "") {  thisOutputDir = m->hasPath(filename);  }
                string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.names";
                ofstream nameFile;
                m->openOutputFile(nameFileName, nameFile);
@@ -3122,7 +3177,7 @@ void ShhherCommand::writeNames(string thisCompositeNamesFileName, int numOTUs, s
 void ShhherCommand::writeGroups(string filename, int numSeqs, vector<string>& seqNameVector){
        try {
                string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
+               if (outputDir == "") {  thisOutputDir = m->hasPath(filename);  }
                string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(filename));
                string groupFileName = fileRoot + "shhh.groups";
                ofstream groupFile;
@@ -3147,7 +3202,7 @@ void ShhherCommand::writeGroups(string filename, int numSeqs, vector<string>& se
 void ShhherCommand::writeClusters(string filename, int numOTUs, int numFlowCells, vector<int> otuCounts, vector<int>& centroids, vector<short>& uniqueFlowgrams, vector<string>& seqNameVector, vector<vector<int> >& aaI, vector<int>& nSeqsPerOTU, vector<int>& lengths, vector<short>& flowDataIntI){
        try {
                string thisOutputDir = outputDir;
-               if (outputDir == "") {  thisOutputDir += m->hasPath(filename);  }
+               if (outputDir == "") {  thisOutputDir = m->hasPath(filename);  }
                string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(filename)) + "shhh.counts";
                ofstream otuCountsFile;
                m->openOutputFile(otuCountsFileName, otuCountsFile);