From: Sarah Westcott Date: Fri, 24 Aug 2012 14:55:14 +0000 (-0400) Subject: added sff.multiple command. fixed issue with windows paralellization in chimera... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=f509429e06e545bde69c97cacc0eb436775bd329 added sff.multiple command. fixed issue with windows paralellization in chimera.uchime with count file. --- diff --git a/chimerauchimecommand.cpp b/chimerauchimecommand.cpp index 461c3b6..7ff3989 100644 --- a/chimerauchimecommand.cpp +++ b/chimerauchimecommand.cpp @@ -1616,7 +1616,7 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename string extension = toString(i) + ".temp"; uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0, i); - tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract); + tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount); tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract); pDataArray.push_back(tempUchime); @@ -1748,7 +1748,7 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen string extension = toString(i) + ".temp"; uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end, i); - tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract); + tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount); tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract); pDataArray.push_back(tempUchime); @@ -1761,7 +1761,7 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen //using the main process as a worker saves time and memory - num = driverGroups(parser, outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups); + num = driverGroups(outputFName, filename, accnos, alns, lines[0].start, lines[0].end, groups); //Wait until all threads have terminated. WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); diff --git a/chimerauchimecommand.h b/chimerauchimecommand.h index 67c77f2..39c3141 100644 --- a/chimerauchimecommand.h +++ b/chimerauchimecommand.h @@ -190,7 +190,7 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){ if (pDataArray->hasCount) { error = cparser->getSeqs(pDataArray->groups[i], pDataArray->filename, true); if ((error == 1) || pDataArray->m->control_pressed) { delete cparser; return 0; } }else { - error = cparser->getSeqs(pDataArray->groups[i], pDataArray->filename, true); if ((error == 1) || pDataArray->m->control_pressed) { delete parser; return 0; } + error = parser->getSeqs(pDataArray->groups[i], pDataArray->filename, true); if ((error == 1) || pDataArray->m->control_pressed) { delete parser; return 0; } } //int numSeqs = driver((outputFName + groups[i]), filename, (accnos+ groups[i]), (alns+ groups[i]), numChimeras); @@ -530,15 +530,15 @@ static DWORD WINAPI MyUchimeSeqsThreadFunction(LPVOID lpParam){ //prepFile(filename, outputFileName); /******************************************/ ifstream in23; - m->openInputFile((filename.substr(1, filename.length()-2)), in23); + pDataArray->m->openInputFile((filename.substr(1, filename.length()-2)), in23); ofstream out23; - m->openOutputFile(outputFileName, out23); + pDataArray->m->openOutputFile(outputFileName, out23); while (!in23.eof()) { - if (m->control_pressed) { break; } + if (pDataArray->m->control_pressed) { break; } - Sequence seq(in23); m->gobble(in23); + Sequence seq(in23); pDataArray->m->gobble(in23); if (seq.getName() != "") { seq.printSequence(out23); } } diff --git a/mothurout.cpp b/mothurout.cpp index 1a3bf79..2debf84 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -939,7 +939,7 @@ string MothurOut::getFullPathName(string fileName){ } for (int i = index; i >= 0; i--) { - newFileName = dirs[i] + "\\" + newFileName; + newFileName = dirs[i] + "\\\\" + newFileName; } return newFileName; @@ -2431,30 +2431,29 @@ void MothurOut::splitAtDash(string& estim, vector& container) { try { string individual = ""; int estimLength = estim.size(); + bool prevEscape = false; for(int i=0;i& container) { try { string individual = ""; int estimLength = estim.size(); + bool prevEscape = false; for(int i=0;i& container) { //This function parses the line options and puts them in a set void MothurOut::splitAtDash(string& estim, set& container) { try { - string individual; + string individual = ""; int lineNum; - - while (estim.find_first_of('-') != -1) { - individual = estim.substr(0,estim.find_first_of('-')); - if ((estim.find_first_of('-')+1) <= estim.length()) { //checks to make sure you don't have dash at end of string - estim = estim.substr(estim.find_first_of('-')+1, estim.length()); - convert(individual, lineNum); //convert the string to int - container.insert(lineNum); + int estimLength = estim.size(); + bool prevEscape = false; + for(int i=0;i uniqueSffNames;// so we don't add the same sff multiple times map::iterator it; set namesToRemove; for(int i=0;imothurRemove(filehandles[i][j]); m->mothurRemove(filehandlesHeaders[i][j]); namesToRemove.insert(filehandles[i][j]); - }else{ - it = uniqueSffNames.find(filehandles[i][j]); - if (it == uniqueSffNames.end()) { - uniqueSffNames[filehandles[i][j]] = barcodeNameVector[i]; - } - } + } } } } diff --git a/sffmultiplecommand.cpp b/sffmultiplecommand.cpp index e9c4784..05bc9aa 100644 --- a/sffmultiplecommand.cpp +++ b/sffmultiplecommand.cpp @@ -7,11 +7,7 @@ // #include "sffmultiplecommand.h" -#include "sffinfocommand.h" -#include "seqsummarycommand.h" -#include "trimflowscommand.h" -#include "shhhercommand.h" -#include "trimseqscommand.h" + //********************************************************************************************************************** @@ -33,7 +29,7 @@ vector SffMultipleCommand::setParameters(){ CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs); CommandParameter psignal("signal", "Number", "", "0.50", "", "", "",false,false); parameters.push_back(psignal); CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pnoise); - CommandParameter porder("order", "String", "", "", "", "", "",false,false); parameters.push_back(porder); + CommandParameter porder("order", "String", "", "TACG", "", "", "",false,false); parameters.push_back(porder); //shhh.flows CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plookup); @@ -50,14 +46,7 @@ vector SffMultipleCommand::setParameters(){ CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength); CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength); CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward); - CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim); - CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold); - CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage); - CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage); - CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage); - CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize); - CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize); - CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst); + CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst); CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast); @@ -96,16 +85,8 @@ string SffMultipleCommand::getHelpString(){ helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n"; helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n"; helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n"; - helpString += "The qfile parameter allows you to provide a quality file.\n"; - helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n"; - helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n"; - helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n"; - helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n"; - helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n"; - helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n"; helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n"; helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n"; - helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n"; helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n"; helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n"; @@ -128,7 +109,10 @@ string SffMultipleCommand::getOutputFileNameTag(string type, string inputName="" it = outputTypes.find(type); if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } else { - m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; + if (type == "fasta") { outputFileName = "fasta"; } + else if (type == "name") { outputFileName = "names"; } + else if (type == "group") { outputFileName = "groups"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } } return outputFileName; } @@ -146,6 +130,8 @@ SffMultipleCommand::SffMultipleCommand(){ setParameters(); vector tempOutNames; outputTypes["fasta"] = tempOutNames; + outputTypes["name"] = tempOutNames; + outputTypes["group"] = tempOutNames; outputTypes["flow"] = tempOutNames; outputTypes["qfile"] = tempOutNames; } @@ -158,7 +144,7 @@ SffMultipleCommand::SffMultipleCommand(){ SffMultipleCommand::SffMultipleCommand(string option) { try { - abort = false; calledHelp = false; + abort = false; calledHelp = false; append=false; makeGroup=false; //allow user to run help if(option == "help") { help(); abort = true; calledHelp = true; } @@ -184,6 +170,9 @@ SffMultipleCommand::SffMultipleCommand(string option) { outputTypes["fasta"] = tempOutNames; outputTypes["flow"] = tempOutNames; outputTypes["qfile"] = tempOutNames; + outputTypes["name"] = tempOutNames; + outputTypes["group"] = tempOutNames; + //if the user changes the output directory command factory will send this info to us in the output parameter outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; } @@ -200,6 +189,14 @@ SffMultipleCommand::SffMultipleCommand(string option) { //if the user has not given a path then, add inputdir. else leave path alone. if (path == "") { parameters["file"] = inputDir + it->second; } } + + it = parameters.find("lookup"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["lookup"] = inputDir + it->second; } + } } filename = validParameter.validFile(parameters, "file", true); @@ -257,7 +254,7 @@ SffMultipleCommand::SffMultipleCommand(string option) { m->mothurConvert(temp, cutoff); temp = validParameter.validFile(parameters, "mindelta", false); if (temp == "not found"){ temp = "0.000001"; } - m->mothurConvert(temp, minDelta); + minDelta = temp; temp = validParameter.validFile(parameters, "maxiter", false); if (temp == "not found"){ temp = "1000"; } m->mothurConvert(temp, maxIters); @@ -284,27 +281,6 @@ SffMultipleCommand::SffMultipleCommand(string option) { temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; } m->mothurConvert(temp, maxLength); - temp = validParameter.validFile(parameters, "qthreshold", false); if (temp == "not found") { temp = "0"; } - m->mothurConvert(temp, qThreshold); - - temp = validParameter.validFile(parameters, "qtrim", false); if (temp == "not found") { temp = "t"; } - qtrim = m->isTrue(temp); - - temp = validParameter.validFile(parameters, "rollaverage", false); if (temp == "not found") { temp = "0"; } - convert(temp, qRollAverage); - - temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; } - convert(temp, qWindowAverage); - - temp = validParameter.validFile(parameters, "qwindowsize", false); if (temp == "not found") { temp = "50"; } - convert(temp, qWindowSize); - - temp = validParameter.validFile(parameters, "qstepsize", false); if (temp == "not found") { temp = "1"; } - convert(temp, qWindowStep); - - temp = validParameter.validFile(parameters, "qaverage", false); if (temp == "not found") { temp = "0"; } - convert(temp, qAverage); - temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; } convert(temp, keepFirst); @@ -316,11 +292,76 @@ SffMultipleCommand::SffMultipleCommand(string option) { temp = validParameter.validFile(parameters, "keepforward", false); if (temp == "not found") { temp = "F"; } keepforward = m->isTrue(temp); - - numFPrimers = 0; - numRPrimers = 0; - numLinkers = 0; - numSpacers = 0; + + temp = validParameter.validFile(parameters, "lookup", true); + if (temp == "not found") { + lookupFileName = "LookUp_Titanium.pat"; + + int ableToOpen; + ifstream in; + ableToOpen = m->openInputFile(lookupFileName, in, "noerror"); + in.close(); + + //if you can't open it, try input location + if (ableToOpen == 1) { + if (inputDir != "") { //default path is set + string tryPath = inputDir + lookupFileName; + m->mothurOut("Unable to open " + lookupFileName + ". Trying input directory " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + lookupFileName = tryPath; + } + } + + //if you can't open it, try default location + if (ableToOpen == 1) { + if (m->getDefaultPath() != "") { //default path is set + string tryPath = m->getDefaultPath() + m->getSimpleName(lookupFileName); + m->mothurOut("Unable to open " + lookupFileName + ". Trying default " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + lookupFileName = tryPath; + } + } + + //if you can't open it its not in current working directory or inputDir, try mothur excutable location + if (ableToOpen == 1) { + string exepath = m->argv; + string tempPath = exepath; + for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); } + exepath = exepath.substr(0, (tempPath.find_last_of('m'))); + + string tryPath = m->getFullPathName(exepath) + m->getSimpleName(lookupFileName); + m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine(); + ifstream in2; + ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + lookupFileName = tryPath; + } + + if (ableToOpen == 1) { m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true; } + } + else if(temp == "not open") { + + lookupFileName = validParameter.validFile(parameters, "lookup", false); + + //if you can't open it its not inputDir, try mothur excutable location + string exepath = m->argv; + string tempPath = exepath; + for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); } + exepath = exepath.substr(0, (tempPath.find_last_of('m'))); + + string tryPath = m->getFullPathName(exepath) + lookupFileName; + m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine(); + ifstream in2; + int ableToOpen = m->openInputFile(tryPath, in2, "noerror"); + in2.close(); + lookupFileName = tryPath; + + if (ableToOpen == 1) { m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true; } + }else { lookupFileName = temp; } } } catch(exception& e) { @@ -336,15 +377,34 @@ int SffMultipleCommand::execute(){ vector sffFiles, oligosFiles; readFile(sffFiles, oligosFiles); + outputDir = m->hasPath(filename); + string fileroot = outputDir + m->getRootName(m->getSimpleName(filename)); + string fasta = fileroot + getOutputFileNameTag("fasta"); + string name = fileroot + getOutputFileNameTag("name"); + string group = fileroot + getOutputFileNameTag("group"); + if (m->control_pressed) { return 0; } if (sffFiles.size() < processors) { processors = sffFiles.size(); } - - if (processors == 1) { driver(sffFiles, oligosFiles, 0, sffFiles.size()); } - else { createProcesses(sffFiles, oligosFiles); } + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) +#else + //trim.flows, shhh.flows cannot handle multiple processors for windows. + processors = 1; m->mothurOut("This command can only use 1 processor on Windows platforms, using 1 processors.\n\n"); +#endif + if (processors == 1) { driver(sffFiles, oligosFiles, 0, sffFiles.size(), fasta, name, group); } + else { createProcesses(sffFiles, oligosFiles, fasta, name, group); } if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + if (append) { + outputNames.push_back(fasta); outputTypes["fasta"].push_back(fasta); + m->setFastaFile(fasta); + outputNames.push_back(name); outputTypes["name"].push_back(name); + m->setNameFile(name); + if (makeGroup) { outputNames.push_back(group); outputTypes["group"].push_back(group); m->setGroupFile(group); } + } + //report output filenames m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); @@ -364,6 +424,8 @@ int SffMultipleCommand::readFile(vector& sffFiles, vector& oligo ifstream in; m->openInputFile(filename, in); + bool allBlank = true; + bool allFull = true; string oligos, sff; while (!in.eof()) { @@ -372,6 +434,8 @@ int SffMultipleCommand::readFile(vector& sffFiles, vector& oligo in >> sff; + sff = m->getFullPathName(sff); + //ignore file pairing if(sff[0] == '#'){ while (!in.eof()) { char c = in.get(); if (c == 10 || c == 13){ break; } } m->gobble(in); } else { //check for oligos file @@ -384,14 +448,18 @@ int SffMultipleCommand::readFile(vector& sffFiles, vector& oligo else if (c == 32 || c == 9){;} //space or tab else { oligos += c; } } + sffFiles.push_back(sff); + if (oligos != "") { oligos = m->getFullPathName(oligos); allBlank = false; } + if (oligos == "") { allFull = false; } + oligosFiles.push_back(oligos); //will push a blank if there is not an oligos for this sff file } m->gobble(in); - - sffFiles.push_back(sff); - oligosFiles.push_back(oligos); //will push a blank if there is not an oligos for this sff file } in.close(); + if (allBlank || allFull) { append = true; } + if (allFull) { makeGroup = true; } + return 0; } catch(exception& e) { @@ -400,12 +468,17 @@ int SffMultipleCommand::readFile(vector& sffFiles, vector& oligo } } //********************************************************************************************************************** -int SffMultipleCommand::driver(vector sffFiles, vector oligosFiles, int start, int end){ +//runs sffinfo, summary.seqs, trim.flows, shhh.flows, trim.seqs, summary.seqs for each sff file. +int SffMultipleCommand::driver(vector sffFiles, vector oligosFiles, int start, int end, string fasta, string name, string group){ try { + m->mothurRemove(fasta); m->mothurRemove(name); m->mothurRemove(group); int count = 0; - for (int i = start; i < end; i++) { - string sff = sffFiles[i]; - string oligos = oligosFiles[i]; + for (int s = start; s < end; s++) { + + string sff = sffFiles[s]; + string oligos = oligosFiles[s]; + + m->mothurOut("\n>>>>>\tProcessing " + sff + " (file " + toString(s+1) + " of " + toString(sffFiles.size()) + ")\t<<<<<\n"); //run sff.info string inputString = "sff=" + sff + ", flow=T"; @@ -417,11 +490,13 @@ int SffMultipleCommand::driver(vector sffFiles, vector oligosFil Command* sffCommand = new SffInfoCommand(inputString); sffCommand->execute(); + if (m->control_pressed){ break; } + map > filenames = sffCommand->getOutputFiles(); delete sffCommand; m->mothurCalling = false; - m->mothurOut("/******************************************/"); m->mothurOutEndLine(); + m->mothurOutEndLine(); //run summary.seqs on the fasta file string fastaFile = ""; @@ -429,19 +504,211 @@ int SffMultipleCommand::driver(vector sffFiles, vector oligosFil if (it != filenames.end()) { if ((it->second).size() != 0) { fastaFile = (it->second)[0]; } } else { m->mothurOut("[ERROR]: sffinfo did not create a fasta file, quitting.\n"); m->control_pressed = true; break; } - inputString = "fasta=" + fastaFile; - m->mothurOut("/******************************************/"); m->mothurOutEndLine(); + inputString = "fasta=" + fastaFile + ", processors=1"; + m->mothurOutEndLine(); m->mothurOut("Running command: summary.seqs(" + inputString + ")"); m->mothurOutEndLine(); m->mothurCalling = true; Command* summarySeqsCommand = new SeqSummaryCommand(inputString); summarySeqsCommand->execute(); + if (m->control_pressed){ break; } + + map > temp = summarySeqsCommand->getOutputFiles(); + mergeOutputFileList(filenames, temp); + + delete summarySeqsCommand; + m->mothurCalling = false; + + m->mothurOutEndLine(); + + //run trim.flows on the fasta file + string flowFile = ""; + it = filenames.find("flow"); + if (it != filenames.end()) { if ((it->second).size() != 0) { flowFile = (it->second)[0]; } } + else { m->mothurOut("[ERROR]: sffinfo did not create a flow file, quitting.\n"); m->control_pressed = true; break; } + + inputString = "flow=" + flowFile; + if (oligos != "") { inputString += ", oligos=" + oligos; } + inputString += ", maxhomop=" + toString(maxHomoP) + ", maxflows=" + toString(maxFlows) + ", minflows=" + toString(minFlows); + inputString += ", pdiffs=" + toString(pdiffs) + ", bdiffs=" + toString(bdiffs) + ", ldiffs=" + toString(ldiffs) + ", sdiffs=" + toString(sdiffs); + inputString += ", tdiffs=" + toString(tdiffs) + ", signal=" + toString(signal) + ", noise=" + toString(noise) + ", order=" + flowOrder + ", processors=1"; + + m->mothurOutEndLine(); + m->mothurOut("Running command: trim.flows(" + inputString + ")"); m->mothurOutEndLine(); + m->mothurCalling = true; + + Command* trimFlowCommand = new TrimFlowsCommand(inputString); + trimFlowCommand->execute(); + + if (m->control_pressed){ break; } + + temp = trimFlowCommand->getOutputFiles(); + mergeOutputFileList(filenames, temp); + + delete trimFlowCommand; + m->mothurCalling = false; + + + string fileFileName = ""; + flowFile = ""; + if (oligos != "") { + it = temp.find("file"); + if (it != temp.end()) { if ((it->second).size() != 0) { fileFileName = (it->second)[0]; } } + else { m->mothurOut("[ERROR]: trim.flows did not create a file file, quitting.\n"); m->control_pressed = true; break; } + }else { + vector flowFiles; + it = temp.find("flow"); + if (it != temp.end()) { if ((it->second).size() != 0) { flowFiles = (it->second); } } + else { m->mothurOut("[ERROR]: trim.flows did not create a flow file, quitting.\n"); m->control_pressed = true; break; } + + for (int i = 0; i < flowFiles.size(); i++) { + string end = flowFiles[i].substr(flowFiles[i].length()-9); + if (end == "trim.flow") { + flowFile = flowFiles[i]; i+=flowFiles.size(); //if we found the trim.flow file stop looking + } + } + } + + if ((fileFileName == "") && (flowFile == "")) { m->mothurOut("[ERROR]: trim.flows did not create a file file or a trim.flow file, quitting.\n"); m->control_pressed = true; break; } + + if (fileFileName != "") { inputString = "file=" + fileFileName; } + else { inputString = "flow=" + flowFile; } + + inputString += ", lookup=" + lookupFileName + ", cutoff=" + toString(cutoff); + ", maxiters=" + toString(maxIters); + if (large) { inputString += ", large=" + toString(largeSize); } + inputString += ", sigma=" +toString(sigma); + inputString += ", mindelta=" + toString(minDelta); + inputString += ", order=" + flowOrder + ", processors=1"; + + //run shhh.flows + m->mothurOutEndLine(); + m->mothurOut("Running command: shhh.flows(" + inputString + ")"); m->mothurOutEndLine(); + m->mothurCalling = true; + + Command* shhhFlowCommand = new ShhherCommand(inputString); + shhhFlowCommand->execute(); + + if (m->control_pressed){ break; } + + temp = shhhFlowCommand->getOutputFiles(); + mergeOutputFileList(filenames, temp); + + delete shhhFlowCommand; + m->mothurCalling = false; + + vector fastaFiles; + vector nameFiles; + it = temp.find("fasta"); + if (it != temp.end()) { if ((it->second).size() != 0) { fastaFiles = (it->second); } } + else { m->mothurOut("[ERROR]: shhh.flows did not create a fasta file, quitting.\n"); m->control_pressed = true; break; } + + it = temp.find("name"); + if (it != temp.end()) { if ((it->second).size() != 0) { nameFiles = (it->second); } } + else { m->mothurOut("[ERROR]: shhh.flows did not create a name file, quitting.\n"); m->control_pressed = true; break; } + + //find fasta and name files with the shortest name. This is because if there is a composite name it will be the shortest. + fastaFile = fastaFiles[0]; + for (int i = 1; i < fastaFiles.size(); i++) { if (fastaFiles[i].length() < fastaFile.length()) { fastaFile = fastaFiles[i]; } } + string nameFile = nameFiles[0]; + for (int i = 1; i < nameFiles.size(); i++) { if (nameFiles[i].length() < nameFile.length()) { nameFile = nameFiles[i]; } } + + inputString = "fasta=" + fastaFile + ", name=" + nameFile; + if (oligos != "") { inputString += ", oligos=" + oligos; } + if (allFiles) { inputString += ", allfiles=t"; } + else { inputString += ", allfiles=f"; } + if (flip) { inputString += ", flip=t"; } + else { inputString += ", flip=f"; } + if (keepforward) { inputString += ", keepforward=t"; } + else { inputString += ", keepforward=f"; } + + + inputString += ", pdiffs=" + toString(pdiffs) + ", bdiffs=" + toString(bdiffs) + ", ldiffs=" + toString(ldiffs) + ", sdiffs=" + toString(sdiffs); + inputString += ", tdiffs=" + toString(tdiffs) + ", maxambig=" + toString(maxAmbig) + ", minlength=" + toString(minLength) + ", maxlength=" + toString(maxLength); + if (keepFirst != 0) { inputString += ", keepfirst=" + toString(keepFirst); } + if (removeLast != 0) { inputString += ", removelast=" + toString(removeLast); } + inputString += ", processors=1"; + + //run trim.seqs + m->mothurOutEndLine(); + m->mothurOut("Running command: trim.seqs(" + inputString + ")"); m->mothurOutEndLine(); + m->mothurCalling = true; + + Command* trimseqsCommand = new TrimSeqsCommand(inputString); + trimseqsCommand->execute(); + + if (m->control_pressed){ break; } + + temp = trimseqsCommand->getOutputFiles(); + mergeOutputFileList(filenames, temp); + + delete trimseqsCommand; + m->mothurCalling = false; + + it = temp.find("fasta"); + if (it != temp.end()) { if ((it->second).size() != 0) { fastaFiles = (it->second); } } + else { m->mothurOut("[ERROR]: trim.seqs did not create a fasta file, quitting.\n"); m->control_pressed = true; break; } + + for (int i = 0; i < fastaFiles.size(); i++) { + string end = fastaFiles[i].substr(fastaFiles[i].length()-10); + if (end == "trim.fasta") { + fastaFile = fastaFiles[i]; i+=fastaFiles.size(); //if we found the trim.fasta file stop looking + } + } + + it = temp.find("name"); + if (it != temp.end()) { if ((it->second).size() != 0) { nameFiles = (it->second); } } + else { m->mothurOut("[ERROR]: trim.seqs did not create a name file, quitting.\n"); m->control_pressed = true; break; } + + for (int i = 0; i < nameFiles.size(); i++) { + string end = nameFiles[i].substr(nameFiles[i].length()-10); + if (end == "trim.names") { + nameFile = nameFiles[i]; i+=nameFiles.size(); //if we found the trim.names file stop looking + } + } + + vector groupFiles; + string groupFile = ""; + if (makeGroup) { + it = temp.find("group"); + if (it != temp.end()) { if ((it->second).size() != 0) { groupFiles = (it->second); } } + + //find group file with the shortest name. This is because if there is a composite group file it will be the shortest. + groupFile = groupFiles[0]; + for (int i = 1; i < groupFiles.size(); i++) { if (groupFiles[i].length() < groupFile.length()) { groupFile = groupFiles[i]; } } + } + + inputString = "fasta=" + fastaFile + ", processors=1, name=" + nameFile; + m->mothurOutEndLine(); + m->mothurOut("Running command: summary.seqs(" + inputString + ")"); m->mothurOutEndLine(); + m->mothurCalling = true; + + summarySeqsCommand = new SeqSummaryCommand(inputString); + summarySeqsCommand->execute(); + + if (m->control_pressed){ break; } + + temp = summarySeqsCommand->getOutputFiles(); + mergeOutputFileList(filenames, temp); + delete summarySeqsCommand; m->mothurCalling = false; + + m->mothurOutEndLine(); m->mothurOut("/******************************************/"); m->mothurOutEndLine(); + if (append) { + m->appendFiles(fastaFile, fasta); + m->appendFiles(nameFile, name); + if (makeGroup) { m->appendFiles(groupFile, group); } + } count++; + + for (it = filenames.begin(); it != filenames.end(); it++) { + for (int i = 0; i < (it->second).size(); i++) { + outputNames.push_back((it->second)[i]); outputTypes[it->first].push_back((it->second)[i]); + } + } } return count; @@ -452,7 +719,29 @@ int SffMultipleCommand::driver(vector sffFiles, vector oligosFil } } //********************************************************************************************************************** -int SffMultipleCommand::createProcesses(vector sffFiles, vector oligosFiles){ +int SffMultipleCommand::mergeOutputFileList(map >& files, map >& temp){ + try { + map >::iterator it; + for (it = temp.begin(); it != temp.end(); it++) { + map >::iterator it2 = files.find(it->first); + if (it2 == files.end()) { //we do not already have this type so just add it + files[it->first] = it->second; + }else { //merge them + for (int i = 0; i < (it->second).size(); i++) { + files[it->first].push_back((it->second)[i]); + } + } + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SffMultipleCommand", "mergeOutputFileList"); + exit(1); + } +} +//********************************************************************************************************************** +int SffMultipleCommand::createProcesses(vector sffFiles, vector oligosFiles, string fasta, string name, string group){ try { vector processIDS; int process = 1; @@ -480,13 +769,14 @@ int SffMultipleCommand::createProcesses(vector sffFiles, vector processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driver(sffFiles, oligosFiles, lines[process].start, lines[process].end); + num = driver(sffFiles, oligosFiles, lines[process].start, lines[process].end, fasta + toString(getpid()) + ".temp", name + toString(getpid()) + ".temp", group + toString(getpid()) + ".temp"); //pass numSeqs to parent ofstream out; string tempFile = toString(getpid()) + ".num.temp"; m->openOutputFile(tempFile, out); - out << num << endl; + out << num << '\t' << outputNames.size() << endl; + for (int i = 0; i < outputNames.size(); i++) { out << outputNames[i] << endl; } out.close(); exit(0); @@ -498,7 +788,7 @@ int SffMultipleCommand::createProcesses(vector sffFiles, vector } //do my part - num = driver(sffFiles, oligosFiles, lines[0].start, lines[0].end); + num = driver(sffFiles, oligosFiles, lines[0].start, lines[0].end, fasta, name, group); //force parent to wait until all the processes are done for (int i=0;i sffFiles, vector wait(&temp); } -#else - - ////////////////////////////////////////////////////////////////////////////////////////////////////// - //Windows version shared memory, so be careful when passing variables through the sffMultiplesData struct. - //Above fork() will clone, so memory is separate, but that's not the case with windows, - ////////////////////////////////////////////////////////////////////////////////////////////////////// - /* - vector pDataArray; - DWORD dwThreadIdArray[processors-1]; - HANDLE hThreadArray[processors-1]; - - //Create processor worker threads. - for( int i=0; ioutputNames.size(); j++){ outputNames.push_back(pDataArray[i]->outputNames[j]); } - CloseHandle(hThreadArray[i]); - delete pDataArray[i]; - } - */ -#endif - for (int i=0;iopenInputFile(tempFile, in); if (!in.eof()) { - int tempNum = 0; - in >> tempNum; + int tempNum = 0; int outputNamesSize = 0; + in >> tempNum >> outputNamesSize; m->gobble(in); + for (int j = 0; j < outputNamesSize; j++) { + string tempName; + in >> tempName; m->gobble(in); + outputNames.push_back(tempName); + } if (tempNum != numFilesToComplete[i+1]) { m->mothurOut("[ERROR]: main process expected " + toString(processIDS[i]) + " to complete " + toString(numFilesToComplete[i+1]) + " files, and it only reported completing " + toString(tempNum) + ". This will cause file mismatches. The flow files may be too large to process with multiple processors. \n"); } } in.close(); m->mothurRemove(tempFile); + + if (append) { + m->appendFiles(fasta+toString(processIDS[i])+".temp", fasta); m->mothurRemove(fasta+toString(processIDS[i])+".temp"); + m->appendFiles(name+toString(processIDS[i])+".temp", name); m->mothurRemove(name+toString(processIDS[i])+".temp"); + if (makeGroup) { m->appendFiles(group+toString(processIDS[i])+".temp", group); m->mothurRemove(group+toString(processIDS[i])+".temp"); } + } } - +#endif return 0; } diff --git a/sffmultiplecommand.h b/sffmultiplecommand.h index ecf33b7..4ab2c97 100644 --- a/sffmultiplecommand.h +++ b/sffmultiplecommand.h @@ -10,6 +10,11 @@ // #include "command.hpp" +#include "sffinfocommand.h" +#include "seqsummarycommand.h" +#include "trimflowscommand.h" +#include "shhhercommand.h" +#include "trimseqscommand.h" class SffMultipleCommand : public Command { @@ -37,21 +42,21 @@ private: linePair(int i, int j) : start(i), end(j) {} }; - string filename, outputDir, flowOrder; + string filename, outputDir, flowOrder, lookupFileName, minDelta; vector outputNames; - bool abort, trim, large, flip, qtrim, allFiles, keepforward; - int maxFlows, minFlows, minLength, maxLength, maxHomoP, tdiffs, bdiffs, pdiffs, sdiffs, ldiffs, numLinkers, numSpacers; - int numFlows, numFPrimers, numRPrimers, processors, maxIters, largeSize; - float signal, noise, cutoff, sigma, minDelta; - int qWindowSize, qWindowStep, keepFirst, removeLast, maxAmbig; - double qRollAverage, qThreshold, qWindowAverage, qAverage; + bool abort, trim, large, flip, allFiles, keepforward, append, makeGroup; + int maxFlows, minFlows, minLength, maxLength, maxHomoP, tdiffs, bdiffs, pdiffs, sdiffs, ldiffs; + int processors, maxIters, largeSize; + float signal, noise, cutoff, sigma; + int keepFirst, removeLast, maxAmbig; int readFile(vector& sffFiles, vector& oligosFiles); - int createProcesses(vector sffFiles, vector oligosFiles); - int driver(vector sffFiles, vector oligosFiles, int start, int end); + int createProcesses(vector sffFiles, vector oligosFiles, string, string, string); + int driver(vector sffFiles, vector oligosFiles, int start, int end, string, string, string); + int mergeOutputFileList(map >& files, map >& temp); + }; - #endif diff --git a/shhhercommand.cpp b/shhhercommand.cpp index c409639..fe0685b 100644 --- a/shhhercommand.cpp +++ b/shhhercommand.cpp @@ -776,8 +776,8 @@ int ShhherCommand::execute(){ if(compositeFASTAFileName != ""){ - outputNames.push_back(compositeFASTAFileName); - outputNames.push_back(compositeNamesFileName); + outputNames.push_back(compositeFASTAFileName); outputTypes["fasta"].push_back(compositeFASTAFileName); + outputNames.push_back(compositeNamesFileName); outputTypes["name"].push_back(compositeNamesFileName); } m->mothurOutEndLine(); @@ -1743,7 +1743,7 @@ void ShhherCommand::writeQualities(vector otuCounts){ } } qualityFile.close(); - outputNames.push_back(qualityFileName); + outputNames.push_back(qualityFileName); outputTypes["qfile"].push_back(qualityFileName); } catch(exception& e) { @@ -1788,7 +1788,7 @@ void ShhherCommand::writeSequences(vector otuCounts){ } fastaFile.close(); - outputNames.push_back(fastaFileName); + outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName); if(compositeFASTAFileName != ""){ m->appendFiles(fastaFileName, compositeFASTAFileName); @@ -1825,7 +1825,7 @@ void ShhherCommand::writeNames(vector otuCounts){ } } nameFile.close(); - outputNames.push_back(nameFileName); + outputNames.push_back(nameFileName); outputTypes["name"].push_back(nameFileName); if(compositeNamesFileName != ""){ @@ -1857,7 +1857,7 @@ void ShhherCommand::writeGroups(){ groupFile << seqNameVector[i] << '\t' << fileGroup << endl; } groupFile.close(); - outputNames.push_back(groupFileName); + outputNames.push_back(groupFileName); outputTypes["group"].push_back(groupFileName); } catch(exception& e) { @@ -1917,7 +1917,7 @@ void ShhherCommand::writeClusters(vector otuCounts){ } } otuCountsFile.close(); - outputNames.push_back(otuCountsFileName); + outputNames.push_back(otuCountsFileName); outputTypes["counts"].push_back(otuCountsFileName); } catch(exception& e) { @@ -1948,8 +1948,8 @@ int ShhherCommand::execute(){ #endif if(compositeFASTAFileName != ""){ - outputNames.push_back(compositeFASTAFileName); - outputNames.push_back(compositeNamesFileName); + outputNames.push_back(compositeFASTAFileName); outputTypes["fasta"].push_back(compositeFASTAFileName); + outputNames.push_back(compositeNamesFileName); outputTypes["name"].push_back(compositeNamesFileName); } m->mothurOutEndLine(); @@ -3266,7 +3266,7 @@ void ShhherCommand::writeQualities(int numOTUs, int numFlowCells, string quality } } qualityFile.close(); - outputNames.push_back(qualityFileName); + outputNames.push_back(qualityFileName); outputTypes["qfile"].push_back(qualityFileName); } catch(exception& e) { @@ -3310,7 +3310,7 @@ void ShhherCommand::writeSequences(string thisCompositeFASTAFileName, int numOTU } fastaFile.close(); - outputNames.push_back(fastaFileName); + outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName); if(thisCompositeFASTAFileName != ""){ m->appendFiles(fastaFileName, thisCompositeFASTAFileName); @@ -3345,7 +3345,7 @@ void ShhherCommand::writeNames(string thisCompositeNamesFileName, int numOTUs, s } } nameFile.close(); - outputNames.push_back(nameFileName); + outputNames.push_back(nameFileName); outputTypes["name"].push_back(nameFileName); if(thisCompositeNamesFileName != ""){ @@ -3370,7 +3370,7 @@ void ShhherCommand::writeGroups(string groupFileName, string fileRoot, int numSe groupFile << seqNameVector[i] << '\t' << fileRoot << endl; } groupFile.close(); - outputNames.push_back(groupFileName); + outputNames.push_back(groupFileName); outputTypes["group"].push_back(groupFileName); } catch(exception& e) { @@ -3429,7 +3429,7 @@ void ShhherCommand::writeClusters(string otuCountsFileName, int numOTUs, int num } } otuCountsFile.close(); - outputNames.push_back(otuCountsFileName); + outputNames.push_back(otuCountsFileName); outputTypes["counts"].push_back(otuCountsFileName); } catch(exception& e) { diff --git a/trimflowscommand.cpp b/trimflowscommand.cpp index 6a3535f..9f603c4 100644 --- a/trimflowscommand.cpp +++ b/trimflowscommand.cpp @@ -28,7 +28,7 @@ vector TrimFlowsCommand::setParameters(){ CommandParameter psignal("signal", "Number", "", "0.50", "", "", "",false,false); parameters.push_back(psignal); CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pnoise); CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "",false,false); parameters.push_back(pallfiles); - CommandParameter porder("order", "String", "", "", "", "", "",false,false); parameters.push_back(porder); + CommandParameter porder("order", "String", "", "TACG", "", "", "",false,false); parameters.push_back(porder); CommandParameter pfasta("fasta", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfasta); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); @@ -347,7 +347,7 @@ int TrimFlowsCommand::execute(){ output.close(); } - outputTypes["flow.files"].push_back(flowFilesFileName); + outputTypes["file"].push_back(flowFilesFileName); outputNames.push_back(flowFilesFileName); // set fasta file as new current fastafile