X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=chimerauchimecommand.cpp;h=026e91b01f188ce180c3b645de8451138efe0e02;hb=8a0b2fd6644dee848b262fcdea73b912a6054fed;hp=c035b01d689657c233126b5748b303a04a997356;hpb=8c8acb6218f58f662466e4111ab8aa4da0caf93c;p=mothur.git diff --git a/chimerauchimecommand.cpp b/chimerauchimecommand.cpp index c035b01..026e91b 100644 --- a/chimerauchimecommand.cpp +++ b/chimerauchimecommand.cpp @@ -9,7 +9,7 @@ #include "chimerauchimecommand.h" #include "deconvolutecommand.h" -#include "uc.h" +//#include "uc.h" #include "sequence.hpp" #include "referencedb.h" @@ -418,7 +418,7 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); - convert(temp, processors); + m->mothurConvert(temp, processors); abskew = validParameter.validFile(parameters, "abskew", false); if (abskew == "not found"){ useAbskew = false; abskew = "1.9"; }else{ useAbskew = true; } if (useAbskew && templatefile != "self") { m->mothurOut("The abskew parameter is only valid with template=self, ignoring."); m->mothurOutEndLine(); useAbskew = false; } @@ -453,7 +453,25 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; } if (hasGroup && (templatefile != "self")) { m->mothurOut("You have provided a group file and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; } - + + //look for uchime exe + path = m->argv; + string tempPath = path; + for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); } + path = path.substr(0, (tempPath.find_last_of('m'))); + + string uchimeCommand; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + uchimeCommand = path + "uchime"; // format the database, -o option gives us the ability +#else + uchimeCommand = path + "uchime.exe"; +#endif + + //test to make sure uchime exists + ifstream in; + uchimeCommand = m->getFullPathName(uchimeCommand); + int ableToOpen = m->openInputFile(uchimeCommand, in, "no error"); in.close(); + if(ableToOpen == 1) { m->mothurOut("[ERROR]: " + uchimeCommand + " file does not exist. mothur requires the uchime executable."); m->mothurOutEndLine(); abort = true; } } } catch(exception& e) { @@ -491,7 +509,7 @@ int ChimeraUchimeCommand::execute(){ if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one nameFile = nameFileNames[s]; }else { nameFile = getNamesFile(fastaFileNames[s]); } - + map seqs; readFasta(fastaFileNames[s], seqs); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } @@ -525,12 +543,9 @@ int ChimeraUchimeCommand::execute(){ if (chimealns) { m->openOutputFile(alnsFileName, out2); out2.close(); } int totalSeqs = 0; - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) if(processors == 1) { totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups); } - else { totalSeqs = createProcessesGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, groups); } - #else - totalSeqs = driverGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, 0, groups.size(), groups); - #endif + else { totalSeqs = createProcessesGroups(parser, outputFileName, newFasta, accnosFileName, alnsFileName, groups, nameFile, groupFile, fastaFileNames[s]); } + if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } int totalChimeras = deconvoluteResults(parser, outputFileName, accnosFileName, alnsFileName); @@ -545,12 +560,19 @@ int ChimeraUchimeCommand::execute(){ int numSeqs = 0; int numChimeras = 0; - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if(processors == 1){ numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); } else{ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); } - #else - numSeqs = driver(outputFileName, fastaFileNames[s], accnosFileName, alnsFileName, numChimeras); - #endif + + //add headings + ofstream out; + m->openOutputFile(outputFileName+".temp", out); + out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n"; + out.close(); + + m->appendFiles(outputFileName, outputFileName+".temp"); + m->mothurRemove(outputFileName); rename((outputFileName+".temp").c_str(), outputFileName.c_str()); + if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } //remove file made for uchime @@ -638,6 +660,7 @@ int ChimeraUchimeCommand::deconvoluteResults(SequenceParser& parser, string outp ofstream out; m->openOutputFile(outputFileName+".temp", out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); + out << "Score\tQuery\tParentA\tParentB\tIdQM\tIdQA\tIdQB\tIdAB\tIdQT\tLY\tLN\tLA\tRY\tRN\tRA\tDiv\tYN\n"; float temp1; string parent1, parent2, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, flag; @@ -904,14 +927,15 @@ string ChimeraUchimeCommand::getNamesFile(string& inputFile){ string inputString = "fasta=" + inputFile; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); - + m->mothurCalling = true; + Command* uniqueCommand = new DeconvoluteCommand(inputString); uniqueCommand->execute(); map > filenames = uniqueCommand->getOutputFiles(); delete uniqueCommand; - + m->mothurCalling = false; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); nameFile = filenames["name"][0]; @@ -965,10 +989,34 @@ int ChimeraUchimeCommand::driverGroups(SequenceParser& parser, string outputFNam int ChimeraUchimeCommand::driver(string outputFName, string filename, string accnos, string alns, int& numChimeras){ try { + outputFName = m->getFullPathName(outputFName); + filename = m->getFullPathName(filename); + alns = m->getFullPathName(alns); + + //to allow for spaces in the path + outputFName = "\"" + outputFName + "\""; + filename = "\"" + filename + "\""; + alns = "\"" + alns + "\""; + vector cPara; - char* tempUchime = new char[8]; - strcpy(tempUchime, "./uchime "); + string path = m->argv; + string tempPath = path; + for (int i = 0; i < path.length(); i++) { tempPath[i] = tolower(path[i]); } + path = path.substr(0, (tempPath.find_last_of('m'))); + + string uchimeCommand = path; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + uchimeCommand += "uchime "; +#else + uchimeCommand += "uchime"; + uchimeCommand = "\"" + uchimeCommand + "\""; +#endif + + char* tempUchime; + tempUchime= new char[uchimeCommand.length()+1]; + *tempUchime = '\0'; + strncat(tempUchime, uchimeCommand.c_str(), uchimeCommand.length()); cPara.push_back(tempUchime); char* tempIn = new char[8]; @@ -1190,15 +1238,27 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc char** uchimeParameters; uchimeParameters = new char*[cPara.size()]; - for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; } - int numArgs = cPara.size(); - - uchime_main(numArgs, uchimeParameters); + string commandString = ""; + for (int i = 0; i < cPara.size(); i++) { uchimeParameters[i] = cPara[i]; commandString += toString(cPara[i]) + " "; } + //int numArgs = cPara.size(); + + //uchime_main(numArgs, uchimeParameters); + //cout << "commandString = " << commandString << endl; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) +#else + commandString = "\"" + commandString + "\""; +#endif + system(commandString.c_str()); //free memory - for(int i = 0; i < cPara.size(); i++) { delete[] cPara[i]; } + for(int i = 0; i < cPara.size(); i++) { delete cPara[i]; } delete[] uchimeParameters; + //remove "" from filenames + outputFName = outputFName.substr(1, outputFName.length()-2); + filename = filename.substr(1, filename.length()-2); + alns = alns.substr(1, alns.length()-2); + if (m->control_pressed) { return 0; } //create accnos file from uchime results @@ -1248,9 +1308,10 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename processIDS.clear(); int process = 1; int num = 0; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - //break up file into multiple files vector files; + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + //break up file into multiple files m->divideFile(filename, processors, files); if (m->control_pressed) { return 0; } @@ -1303,10 +1364,92 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename } in.close(); m->mothurRemove(tempFile); } +#else + ////////////////////////////////////////////////////////////////////////////////////////////////////// + //Windows version shared memory, so be careful when passing variables through the preClusterData struct. + //Above fork() will clone, so memory is separate, but that's not the case with windows, + ////////////////////////////////////////////////////////////////////////////////////////////////////// + + //divide file + int count = 0; + int spot = 0; + map filehandles; + map::iterator it3; + + ofstream* temp; + for (int i = 0; i < processors; i++) { + temp = new ofstream; + filehandles[i] = temp; + m->openOutputFile(filename+toString(i)+".temp", *(temp)); + files.push_back(filename+toString(i)+".temp"); + } + + ifstream in; + m->openInputFile(filename, in); + + while(!in.eof()) { + + if (m->control_pressed) { in.close(); for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { (*(it3->second)).close(); delete it3->second; } return 0; } + + Sequence tempSeq(in); m->gobble(in); + + if (tempSeq.getName() != "") { + tempSeq.printSequence(*(filehandles[spot])); + spot++; count++; + if (spot == processors) { spot = 0; } + } + } + in.close(); + + //delete memory + for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { + (*(it3->second)).close(); + delete it3->second; + } + + //sanity check for number of processors + if (count < processors) { processors = count; } + + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + vector dummy; //used so that we can use the same struct for MyUchimeSeqsThreadFunction and MyUchimeThreadFunction + + //Create processor worker threads. + for( int i=1; isetBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract); + tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract); + + pDataArray.push_back(tempUchime); + processIDS.push_back(i); + + //MySeqSumThreadFunction is in header. It must be global or static to work with the threads. + //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier + hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeSeqsThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); + } + + + //using the main process as a worker saves time and memory + num = driver(outputFileName, files[0], accnos, alns, numChimeras); + //Wait until all threads have terminated. + WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); + + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + num += pDataArray[i]->count; + numChimeras += pDataArray[i]->numChimeras; + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } +#endif //append output files - for(int i=0;iappendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName); m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp")); @@ -1321,7 +1464,6 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename //get rid of the file pieces. for (int i = 0; i < files.size(); i++) { m->mothurRemove(files[i]); } -#endif return num; } catch(exception& e) { @@ -1331,7 +1473,7 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename } /**************************************************************************************************/ -int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, vector groups) { +int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string outputFName, string filename, string accnos, string alns, vector groups, string nameFile, string groupFile, string fastaFile) { try { processIDS.clear(); @@ -1351,7 +1493,7 @@ int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string o lines.push_back(linePair(startIndex, endIndex)); } -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) //loop through and create all the processes you want while (process != processors) { @@ -1386,7 +1528,6 @@ int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string o int temp = processIDS[i]; wait(&temp); } -#endif for (int i = 0; i < processIDS.size(); i++) { ifstream in; @@ -1395,10 +1536,52 @@ int ChimeraUchimeCommand::createProcessesGroups(SequenceParser& parser, string o if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; } in.close(); m->mothurRemove(tempFile); } + +#else + ////////////////////////////////////////////////////////////////////////////////////////////////////// + //Windows version shared memory, so be careful when passing variables through the uchimeData struct. + //Above fork() will clone, so memory is separate, but that's not the case with windows, + ////////////////////////////////////////////////////////////////////////////////////////////////////// + + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + + //Create processor worker threads. + for( int i=1; isetBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract); + tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract); + + pDataArray.push_back(tempUchime); + processIDS.push_back(i); + + //MyUchimeThreadFunction is in header. It must be global or static to work with the threads. + //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier + hThreadArray[i-1] = CreateThread(NULL, 0, MyUchimeThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); + } + + + //using the main process as a worker saves time and memory + num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups); + //Wait until all threads have terminated. + WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + num += pDataArray[i]->count; + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } +#endif + + //append output files - for(int i=0;iappendFiles((outputFName + toString(processIDS[i]) + ".temp"), outputFName); m->mothurRemove((outputFName + toString(processIDS[i]) + ".temp"));