]> git.donarmstrong.com Git - mothur.git/blobdiff - chimerauchimecommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / chimerauchimecommand.cpp
index 2b24fd8ed7ccc3fba78fa08d25c6c77a67506229..f238094c9563587be29064a8f3fdbce54314c0f9 100644 (file)
@@ -418,7 +418,7 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
                                
                        string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
-                       convert(temp, processors);
+                       m->mothurConvert(temp, processors);
                        
                        abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){     useAbskew = false;  abskew = "1.9";     }else{  useAbskew = true;  }
                        if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; }
@@ -461,7 +461,7 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
                        path = path.substr(0, (tempPath.find_last_of('m')));
                        
                        string uchimeCommand;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                        uchimeCommand = path + "uchime";        //      format the database, -o option gives us the ability
 #else
                        uchimeCommand = path + "uchime.exe";
@@ -509,7 +509,7 @@ int ChimeraUchimeCommand::execute(){
                                if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
                                        nameFile = nameFileNames[s];
                                }else { nameFile = getNamesFile(fastaFileNames[s]); }
-                                                                       
+                                                                               
                                map<string, string> seqs;  
                                readFasta(fastaFileNames[s], seqs);  if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  return 0; }
 
@@ -543,12 +543,9 @@ int ChimeraUchimeCommand::execute(){
                                if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); }
                                int totalSeqs = 0;
                                
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                                if(processors == 1)     {       totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups);     }
                                else                            {       totalSeqs = createProcessesGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, groups);                      }
-       #else
-                               totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups);
-       #endif
+
                                if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        }  return 0;    }                               
 
                                int totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, alnsFileName);
@@ -563,12 +560,19 @@ int ChimeraUchimeCommand::execute(){
                        
                                int numSeqs = 0;
                                int numChimeras = 0;
-       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+
                                if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
                                else{   numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); }
-       #else
-                               numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras);
-       #endif
+                               
+                               //add headings
+                               ofstream out;
+                               m->openOutputFile(outputFileName+".temp", out); 
+                               out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
+                               out.close();
+                               
+                               m->appendFiles(outputFileName, outputFileName+".temp");
+                               m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str());
+                               
                                if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        m->mothurRemove(outputNames[j]);        } return 0; }
                        
                                //remove file made for uchime
@@ -656,6 +660,7 @@ int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outp
                
                ofstream out;
                m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+               out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n";
                
                float temp1;
                string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag;
@@ -922,14 +927,15 @@ string ChimeraUchimeCommand::getNamesFile(string& inputFile){
                string inputString = "fasta=" + inputFile;
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
-               
+               m->mothurCalling = true;
+        
                Command* uniqueCommand = new DeconvoluteCommand(inputString);
                uniqueCommand->execute();
                
                map<string, vector<string> > filenames = uniqueCommand->getOutputFiles();
                
                delete uniqueCommand;
-               
+               m->mothurCalling = false;
                m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                
                nameFile = filenames["name"][0];
@@ -982,23 +988,35 @@ int ChimeraUchimeCommand::driverGroups(SequenceParser& parser, string outputFNam
 
 int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){
        try {
+               
+               outputFName = m->getFullPathName(outputFName);
+               filename = m->getFullPathName(filename);
+               alns = m->getFullPathName(alns);
+               
                //to allow for spaces in the path
                outputFName = "\"" + outputFName + "\"";
                filename = "\"" + filename + "\"";
                alns = "\"" + alns + "\"";
                                
                vector<char*> cPara;
-       
-               char* tempUchime;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               tempUchime= new char[10];  
-               *tempUchime = '\0';
-               strncat(tempUchime, "./uchime ", 9); 
+               
+               string path = m->argv;
+               string tempPath = path;
+               for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); }
+               path = path.substr(0, (tempPath.find_last_of('m')));
+               
+               string uchimeCommand = path;
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+               uchimeCommand += "uchime ";
 #else
-               tempUchime= new char[8]; 
-               *tempUchime = '\0';
-               strncat(tempUchime, "uchime ", 7); 
+               uchimeCommand += "uchime";
+               uchimeCommand = "\"" + uchimeCommand + "\"";
 #endif
+               
+               char* tempUchime;
+               tempUchime= new char[uchimeCommand.length()+1]; 
+               *tempUchime = '\0';
+               strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length());
                cPara.push_back(tempUchime);
                
                char* tempIn = new char[8]; 
@@ -1226,6 +1244,10 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc
                
                //uchime_main(numArgs, uchimeParameters); 
                //cout << "commandString = " << commandString << endl;
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+               commandString = "\"" + commandString + "\"";
+#endif
                system(commandString.c_str());
                
                //free memory
@@ -1286,9 +1308,10 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename
                processIDS.clear();
                int process = 1;
                int num = 0;
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)          
-               //break up file into multiple files
                vector<string> files;
+               
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)         
+               //break up file into multiple files
                m->divideFile(filename, processors, files);
                
                if (m->control_pressed) {  return 0;  }
@@ -1341,10 +1364,92 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename
                        }
                        in.close(); m->mothurRemove(tempFile);
                }
+#else
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the preClusterData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               
+               //divide file
+               int count = 0;
+               int spot = 0;
+               map<int, ofstream*> filehandles;
+               map<int, ofstream*>::iterator it3;
+               
+               ofstream* temp;
+               for (int i = 0; i < processors; i++) {
+                       temp = new ofstream;
+                       filehandles[i] = temp;
+                       m->openOutputFile(filename+toString(i)+".temp", *(temp));
+                       files.push_back(filename+toString(i)+".temp");
+               }
+               
+               ifstream in;
+               m->openInputFile(filename, in);
+               
+               while(!in.eof()) {
+                       
+                       if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; }
+                       
+                       Sequence tempSeq(in); m->gobble(in); 
+                       
+                       if (tempSeq.getName() != "") {
+                               tempSeq.printSequence(*(filehandles[spot])); 
+                               spot++; count++;
+                               if (spot == processors) { spot = 0; }
+                       }
+               }
+               in.close();
+               
+               //delete memory
+               for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
+                       (*(it3->second)).close();
+                       delete it3->second;
+               }
+               
+               //sanity check for number of processors
+               if (count < processors) { processors = count; }
+               
+               vector<uchimeData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               vector<string> dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction
+               
+               //Create processor worker threads.
+               for( int i=1; i<processors; i++ ){
+                       // Allocate memory for thread data.
+                       string extension = toString(i) + ".temp";
+                       
+                       uchimeData* tempUchime = new uchimeData(outputFileName+extension, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0,  i);
+                       tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
+                       tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
+                       
+                       pDataArray.push_back(tempUchime);
+                       processIDS.push_back(i);
+                       
+                       //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
+                       //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+                       hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
+               }
                
                
+               //using the main process as a worker saves time and memory
+               num = driver(outputFileName, files[0], accnos, alns, numChimeras);
+               
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+                       num += pDataArray[i]->count;
+                       numChimeras += pDataArray[i]->numChimeras;
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+#endif         
+               
                //append output files
-               for(int i=0;i<processIDS[i];i++){
+               for(int i=0;i<processIDS.size();i++){
                        m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
                        m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
                        
@@ -1359,7 +1464,6 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename
                
                //get rid of the file pieces.
                for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); }
-#endif         
                return num;     
        }
        catch(exception& e) {
@@ -1389,7 +1493,7 @@ int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string o
                        lines.push_back(linePair(startIndex, endIndex));
                }
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)          
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)         
                                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -1424,7 +1528,6 @@ int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string o
                        int temp = processIDS[i];
                        wait(&temp);
                }
-#endif         
                
                for (int i = 0; i < processIDS.size(); i++) {
                        ifstream in;
@@ -1433,10 +1536,52 @@ int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string o
                        if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
                        in.close(); m->mothurRemove(tempFile);
                }
+                               
+#else
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the uchimeData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               
+               vector<uchimeData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor worker threads.
+               for( int i=1; i<processors; i++ ){
+                       // Allocate memory for thread data.
+                       string extension = toString(i) + ".temp";
+                       
+                       uchimeData* tempUchime = new uchimeData(outputFName+extension, templatefile, filename+extension, fastafile, namefile, groupfile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end,  i);
+                       tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract);
+                       tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
+                       
+                       pDataArray.push_back(tempUchime);
+                       processIDS.push_back(i);
+                       
+                       //MyUchimeThreadFunction is in header. It must be global or static to work with the threads.
+                       //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+                       hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
+               }
+               
+               
+               //using the main process as a worker saves time and memory
+               num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups);
                
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
                
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+                       num += pDataArray[i]->count;
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
+#endif         
+               
+                               
                //append output files
-               for(int i=0;i<processIDS[i];i++){
+               for(int i=0;i<processIDS.size();i++){
                        m->appendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName);
                        m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));