X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=chimerauchimecommand.cpp;h=be0421a73097bc47aa98934ba4ae59a320e02010;hb=791f42d21a85f794529fd4c912dcc27d873c25e8;hp=2b24fd8ed7ccc3fba78fa08d25c6c77a67506229;hpb=4f2c7f477a1ef2d60a1c0c84ab1ba8243af67f87;p=mothur.git diff --git a/chimerauchimecommand.cpp b/chimerauchimecommand.cpp index 2b24fd8..be0421a 100644 --- a/chimerauchimecommand.cpp +++ b/chimerauchimecommand.cpp @@ -12,7 +12,7 @@ //#include "uc.h" #include "sequence.hpp" #include "referencedb.h" - +#include "systemcommand.h" //********************************************************************************************************************** vector ChimeraUchimeCommand::setParameters(){ @@ -418,7 +418,7 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); - convert(temp, processors); + m->mothurConvert(temp, processors); abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; } if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; } @@ -461,18 +461,46 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { path = path.substr(0, (tempPath.find_last_of('m'))); string uchimeCommand; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) uchimeCommand = path + "uchime"; // format the database, -o option gives us the ability + if (m->debug) { + m->mothurOut("[DEBUG]: Uchime location using \"which uchime\" = "); + Command* newCommand = new SystemCommand("which uchime"); m->mothurOutEndLine(); + newCommand->execute(); + delete newCommand; + m->mothurOut("[DEBUG]: Mothur's location using \"which mothur\" = "); + newCommand = new SystemCommand("which mothur"); m->mothurOutEndLine(); + newCommand->execute(); + delete newCommand; + } #else uchimeCommand = path + "uchime.exe"; #endif - + //test to make sure uchime exists ifstream in; uchimeCommand = m->getFullPathName(uchimeCommand); int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close(); - if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uchimeCommand + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; } - } + if(ableToOpen == 1) { + m->mothurOut(uchimeCommand + " file does not exist. Checking path... \n"); + //check to see if uchime is in the path?? + + string uLocation = m->findProgramPath("uchime"); + + + ifstream in2; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + ableToOpen = m->openInputFile(uLocation, in2, "no error"); in2.close(); +#else + ableToOpen = m->openInputFile((uLocation + ".exe"), in2, "no error"); in2.close(); +#endif + + if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uLocation + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; } + else { m->mothurOut("Found uchime in your path, using " + uLocation + "\n");uchimeLocation = uLocation; } + }else { uchimeLocation = uchimeCommand; } + + uchimeLocation = m->getFullPathName(uchimeLocation); + } } catch(exception& e) { m->errorOut(e, "ChimeraSlayerCommand", "ChimeraSlayerCommand"); @@ -509,7 +537,7 @@ int ChimeraUchimeCommand::execute(){ if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one nameFile = nameFileNames[s]; }else { nameFile = getNamesFile(fastaFileNames[s]); } - + map seqs; readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } @@ -543,12 +571,9 @@ int ChimeraUchimeCommand::execute(){ if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); } int totalSeqs = 0; - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) if(processors == 1) { totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups); } - else { totalSeqs = createProcessesGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, groups); } - #else - totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups); - #endif + else { totalSeqs = createProcessesGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]); } + if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } int totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, alnsFileName); @@ -563,12 +588,19 @@ int ChimeraUchimeCommand::execute(){ int numSeqs = 0; int numChimeras = 0; - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); } else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); } - #else - numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); - #endif + + //add headings + ofstream out; + m->openOutputFile(outputFileName+".temp", out); + out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n"; + out.close(); + + m->appendFiles(outputFileName, outputFileName+".temp"); + m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str()); + if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } //remove file made for uchime @@ -656,6 +688,7 @@ int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outp ofstream out; m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); + out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n"; float temp1; string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag; @@ -922,14 +955,15 @@ string ChimeraUchimeCommand::getNamesFile(string& inputFile){ string inputString = "fasta=" + inputFile; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); - + m->mothurCalling = true; + Command* uniqueCommand = new DeconvoluteCommand(inputString); uniqueCommand->execute(); map > filenames = uniqueCommand->getOutputFiles(); delete uniqueCommand; - + m->mothurCalling = false; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); nameFile = filenames["name"][0]; @@ -982,23 +1016,29 @@ int ChimeraUchimeCommand::driverGroups(SequenceParser& parser, string outputFNam int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){ try { + + outputFName = m->getFullPathName(outputFName); + filename = m->getFullPathName(filename); + alns = m->getFullPathName(alns); + //to allow for spaces in the path outputFName = "\"" + outputFName + "\""; filename = "\"" + filename + "\""; alns = "\"" + alns + "\""; vector cPara; - - char* tempUchime; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - tempUchime= new char[10]; - *tempUchime = '\0'; - strncat(tempUchime, "./uchime ", 9); + + string uchimeCommand = uchimeLocation; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + uchimeCommand += " "; #else - tempUchime= new char[8]; - *tempUchime = '\0'; - strncat(tempUchime, "uchime ", 7); + uchimeCommand = "\"" + uchimeCommand + "\""; #endif + + char* tempUchime; + tempUchime= new char[uchimeCommand.length()+1]; + *tempUchime = '\0'; + strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length()); cPara.push_back(tempUchime); char* tempIn = new char[8]; @@ -1226,6 +1266,11 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc //uchime_main(numArgs, uchimeParameters); //cout << "commandString = " << commandString << endl; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) +#else + commandString = "\"" + commandString + "\""; +#endif + if (m->debug) { m->mothurOut("[DEBUG]: uchime command = " + commandString + ".\n"); } system(commandString.c_str()); //free memory @@ -1286,9 +1331,10 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename processIDS.clear(); int process = 1; int num = 0; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - //break up file into multiple files vector files; + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + //break up file into multiple files m->divideFile(filename, processors, files); if (m->control_pressed) { return 0; } @@ -1341,10 +1387,92 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename } in.close(); m->mothurRemove(tempFile); } +#else + ////////////////////////////////////////////////////////////////////////////////////////////////////// + //Windows version shared memory, so be careful when passing variables through the preClusterData struct. + //Above fork() will clone, so memory is separate, but that's not the case with windows, + ////////////////////////////////////////////////////////////////////////////////////////////////////// + + //divide file + int count = 0; + int spot = 0; + map filehandles; + map::iterator it3; + + ofstream* temp; + for (int i = 0; i < processors; i++) { + temp = new ofstream; + filehandles[i] = temp; + m->openOutputFile(filename+toString(i)+".temp", *(temp)); + files.push_back(filename+toString(i)+".temp"); + } + ifstream in; + m->openInputFile(filename, in); + + while(!in.eof()) { + + if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; } + + Sequence tempSeq(in); m->gobble(in); + + if (tempSeq.getName() != "") { + tempSeq.printSequence(*(filehandles[spot])); + spot++; count++; + if (spot == processors) { spot = 0; } + } + } + in.close(); + + //delete memory + for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { + (*(it3->second)).close(); + delete it3->second; + } + + //sanity check for number of processors + if (count < processors) { processors = count; } + + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + vector dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction + + //Create processor worker threads. + for( int i=1; isetBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract); + tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract); + + pDataArray.push_back(tempUchime); + processIDS.push_back(i); + + //MySeqSumThreadFunction is in header. It must be global or static to work with the threads. + //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier + hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); + } + + + //using the main process as a worker saves time and memory + num = driver(outputFileName, files[0], accnos, alns, numChimeras); + + //Wait until all threads have terminated. + WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); + + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + num += pDataArray[i]->count; + numChimeras += pDataArray[i]->numChimeras; + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } +#endif //append output files - for(int i=0;iappendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName); m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp")); @@ -1359,7 +1487,6 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename //get rid of the file pieces. for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); } -#endif return num; } catch(exception& e) { @@ -1369,7 +1496,7 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename } /**************************************************************************************************/ -int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, vector groups) { +int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, vector groups, string nameFile, string groupFile, string fastaFile) { try { processIDS.clear(); @@ -1389,7 +1516,7 @@ int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string o lines.push_back(linePair(startIndex, endIndex)); } -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { @@ -1424,7 +1551,6 @@ int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string o int temp = processIDS[i]; wait(&temp); } -#endif for (int i = 0; i < processIDS.size(); i++) { ifstream in; @@ -1433,10 +1559,52 @@ int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string o if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; } in.close(); m->mothurRemove(tempFile); } + +#else + ////////////////////////////////////////////////////////////////////////////////////////////////////// + //Windows version shared memory, so be careful when passing variables through the uchimeData struct. + //Above fork() will clone, so memory is separate, but that's not the case with windows, + ////////////////////////////////////////////////////////////////////////////////////////////////////// + + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + + //Create processor worker threads. + for( int i=1; isetBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract); + tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract); + + pDataArray.push_back(tempUchime); + processIDS.push_back(i); + + //MyUchimeThreadFunction is in header. It must be global or static to work with the threads. + //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier + hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); + } + + + //using the main process as a worker saves time and memory + num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups); + //Wait until all threads have terminated. + WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + num += pDataArray[i]->count; + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } +#endif + + //append output files - for(int i=0;iappendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName); m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));