2 // sffmultiplecommand.cpp
5 // Created by Sarah Westcott on 8/14/12.
6 // Copyright (c) 2012 Schloss Lab. All rights reserved.
9 #include "sffmultiplecommand.h"
13 //**********************************************************************************************************************
14 vector<string> SffMultipleCommand::setParameters(){
16 CommandParameter pfile("file", "InputTypes", "", "", "none", "none", "none","fasta-name",false,true,true); parameters.push_back(pfile);
19 CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(ptrim);
22 CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "","",false,false); parameters.push_back(pmaxhomop);
23 CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pmaxflows);
24 CommandParameter pminflows("minflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pminflows);
25 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
26 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
27 CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
28 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
29 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
30 CommandParameter psignal("signal", "Number", "", "0.50", "", "", "","",false,false); parameters.push_back(psignal);
31 CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "","",false,false); parameters.push_back(pnoise);
32 CommandParameter porder("order", "String", "", "TACG", "", "", "","",false,false); parameters.push_back(porder);
35 CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(plookup);
36 CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "","",false,false); parameters.push_back(pcutoff);
37 CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(pmaxiter);
38 CommandParameter plarge("large", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(plarge);
39 CommandParameter psigma("sigma", "Number", "", "60", "", "", "","",false,false); parameters.push_back(psigma);
40 CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "","",false,false); parameters.push_back(pmindelta);
42 //trim.seqs parameters
43 CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "","",false,false); parameters.push_back(pallfiles);
44 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pflip);
45 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig);
46 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength);
47 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength);
48 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward);
49 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pkeepfirst);
50 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "","",false,false); parameters.push_back(premovelast);
53 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
54 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
55 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
57 vector<string> myArray;
58 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
62 m->errorOut(e, "SffMultipleCommand", "setParameters");
66 //**********************************************************************************************************************
67 string SffMultipleCommand::getHelpString(){
69 string helpString = "";
70 helpString += "The sff.multiple command reads a file containing sff filenames and optional oligos filenames. It runs the files through sffinfo, trim.flows, shhh.flows and trim.seqs combining the results.\n";
71 helpString += "The sff.multiple command parameters are: ";
72 vector<string> parameters = setParameters();
73 for (int i = 0; i < parameters.size()-1; i++) {
74 helpString += parameters[i] + ", ";
76 helpString += parameters[parameters.size()-1] + ".\n";
77 helpString += "The file parameter allows you to enter the a file containing the list of sff files and optional oligos files.\n";
78 helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores generated by sffinfo trimmed to the clipQualLeft and clipQualRight values. Default=True. \n";
79 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
80 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
81 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
82 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
83 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
84 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
85 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
86 helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
87 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
88 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
89 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
90 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
91 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
93 helpString += "Example sff.multiple(file=mySffOligosFile.txt, trim=F).\n";
94 helpString += "Note: No spaces between parameter labels (i.e. file), '=' and parameters (i.e.mySffOligosFile.txt).\n";
98 m->errorOut(e, "SffMultipleCommand", "getHelpString");
102 //**********************************************************************************************************************
103 string SffMultipleCommand::getOutputPattern(string type) {
107 if (type == "fasta") { pattern = "[filename],fasta"; }
108 else if (type == "name") { pattern = "[filename],names"; }
109 else if (type == "group") { pattern = "[filename],groups"; }
110 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
114 catch(exception& e) {
115 m->errorOut(e, "SffMultipleCommand", "getOutputPattern");
119 //**********************************************************************************************************************
120 SffMultipleCommand::SffMultipleCommand(){
122 abort = true; calledHelp = true;
124 vector<string> tempOutNames;
125 outputTypes["fasta"] = tempOutNames;
126 outputTypes["name"] = tempOutNames;
127 outputTypes["group"] = tempOutNames;
129 catch(exception& e) {
130 m->errorOut(e, "SffMultipleCommand", "SffMultipleCommand");
134 //**********************************************************************************************************************
136 SffMultipleCommand::SffMultipleCommand(string option) {
138 abort = false; calledHelp = false; append=false; makeGroup=false;
140 //allow user to run help
141 if(option == "help") { help(); abort = true; calledHelp = true; }
142 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
145 //valid paramters for this command
146 vector<string> myArray = setParameters();
148 OptionParser parser(option);
149 map<string, string> parameters = parser.getParameters();
151 ValidParameters validParameter;
152 map<string,string>::iterator it;
154 //check to make sure all parameters are valid for command
155 for (it = parameters.begin(); it != parameters.end(); it++) {
156 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
159 //initialize outputTypes
160 vector<string> tempOutNames;
161 outputTypes["fasta"] = tempOutNames;
162 outputTypes["name"] = tempOutNames;
163 outputTypes["group"] = tempOutNames;
166 //if the user changes the output directory command factory will send this info to us in the output parameter
167 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
169 //if the user changes the input directory command factory will send this info to us in the output parameter
170 string inputDir = validParameter.validFile(parameters, "inputdir", false);
171 if (inputDir == "not found"){ inputDir = ""; }
174 it = parameters.find("file");
175 //user has given a template file
176 if(it != parameters.end()){
177 path = m->hasPath(it->second);
178 //if the user has not given a path then, add inputdir. else leave path alone.
179 if (path == "") { parameters["file"] = inputDir + it->second; }
182 it = parameters.find("lookup");
183 //user has given a template file
184 if(it != parameters.end()){
185 path = m->hasPath(it->second);
186 //if the user has not given a path then, add inputdir. else leave path alone.
187 if (path == "") { parameters["lookup"] = inputDir + it->second; }
191 filename = validParameter.validFile(parameters, "file", true);
192 if (filename == "not open") { filename = ""; abort = true; }
193 else if (filename == "not found") { filename = ""; }
196 temp = validParameter.validFile(parameters, "trim", false); if (temp == "not found"){ temp = "T"; }
197 trim = m->isTrue(temp);
199 temp = validParameter.validFile(parameters, "minflows", false); if (temp == "not found") { temp = "450"; }
200 m->mothurConvert(temp, minFlows);
202 temp = validParameter.validFile(parameters, "maxflows", false); if (temp == "not found") { temp = "450"; }
203 m->mothurConvert(temp, maxFlows);
205 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found"){ temp = "9"; }
206 m->mothurConvert(temp, maxHomoP);
208 temp = validParameter.validFile(parameters, "signal", false); if (temp == "not found"){ temp = "0.50"; }
209 m->mothurConvert(temp, signal);
211 temp = validParameter.validFile(parameters, "noise", false); if (temp == "not found"){ temp = "0.70"; }
212 m->mothurConvert(temp, noise);
214 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found"){ temp = "0"; }
215 m->mothurConvert(temp, bdiffs);
217 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found"){ temp = "0"; }
218 m->mothurConvert(temp, pdiffs);
220 temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
221 m->mothurConvert(temp, ldiffs);
223 temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
224 m->mothurConvert(temp, sdiffs);
226 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
227 m->mothurConvert(temp, tdiffs);
229 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
232 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
233 m->setProcessors(temp);
234 m->mothurConvert(temp, processors);
236 flowOrder = validParameter.validFile(parameters, "order", false);
237 if (flowOrder == "not found"){ flowOrder = "TACG"; }
238 else if(flowOrder.length() != 4){
239 m->mothurOut("The value of the order option must be four bases long\n");
242 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found"){ temp = "0.01"; }
243 m->mothurConvert(temp, cutoff);
245 temp = validParameter.validFile(parameters, "mindelta", false); if (temp == "not found"){ temp = "0.000001"; }
248 temp = validParameter.validFile(parameters, "maxiter", false); if (temp == "not found"){ temp = "1000"; }
249 m->mothurConvert(temp, maxIters);
251 temp = validParameter.validFile(parameters, "large", false); if (temp == "not found"){ temp = "0"; }
252 m->mothurConvert(temp, largeSize);
253 if (largeSize != 0) { large = true; }
254 else { large = false; }
255 if (largeSize < 0) { m->mothurOut("The value of the large cannot be negative.\n"); }
257 temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found") { temp = "60"; }
258 m->mothurConvert(temp, sigma);
260 temp = validParameter.validFile(parameters, "flip", false);
261 if (temp == "not found") { flip = 0; }
262 else { flip = m->isTrue(temp); }
264 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
265 m->mothurConvert(temp, maxAmbig);
267 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
268 m->mothurConvert(temp, minLength);
270 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
271 m->mothurConvert(temp, maxLength);
273 temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; }
274 convert(temp, keepFirst);
276 temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; }
277 convert(temp, removeLast);
279 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
280 allFiles = m->isTrue(temp);
282 temp = validParameter.validFile(parameters, "keepforward", false); if (temp == "not found") { temp = "F"; }
283 keepforward = m->isTrue(temp);
285 temp = validParameter.validFile(parameters, "lookup", true);
286 if (temp == "not found") {
287 lookupFileName = "LookUp_Titanium.pat";
291 ableToOpen = m->openInputFile(lookupFileName, in, "noerror");
294 //if you can't open it, try input location
295 if (ableToOpen == 1) {
296 if (inputDir != "") { //default path is set
297 string tryPath = inputDir + lookupFileName;
298 m->mothurOut("Unable to open " + lookupFileName + ". Trying input directory " + tryPath); m->mothurOutEndLine();
300 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
302 lookupFileName = tryPath;
306 //if you can't open it, try default location
307 if (ableToOpen == 1) {
308 if (m->getDefaultPath() != "") { //default path is set
309 string tryPath = m->getDefaultPath() + m->getSimpleName(lookupFileName);
310 m->mothurOut("Unable to open " + lookupFileName + ". Trying default " + tryPath); m->mothurOutEndLine();
312 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
314 lookupFileName = tryPath;
318 //if you can't open it its not in current working directory or inputDir, try mothur excutable location
319 if (ableToOpen == 1) {
320 string exepath = m->argv;
321 string tempPath = exepath;
322 for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
323 exepath = exepath.substr(0, (tempPath.find_last_of('m')));
325 string tryPath = m->getFullPathName(exepath) + m->getSimpleName(lookupFileName);
326 m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
328 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
330 lookupFileName = tryPath;
333 if (ableToOpen == 1) { m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true; }
335 else if(temp == "not open") {
337 lookupFileName = validParameter.validFile(parameters, "lookup", false);
339 //if you can't open it its not inputDir, try mothur excutable location
340 string exepath = m->argv;
341 string tempPath = exepath;
342 for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
343 exepath = exepath.substr(0, (tempPath.find_last_of('m')));
345 string tryPath = m->getFullPathName(exepath) + lookupFileName;
346 m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
348 int ableToOpen = m->openInputFile(tryPath, in2, "noerror");
350 lookupFileName = tryPath;
352 if (ableToOpen == 1) { m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true; }
353 }else { lookupFileName = temp; }
356 catch(exception& e) {
357 m->errorOut(e, "SffMultipleCommand", "SffMultipleCommand");
361 //**********************************************************************************************************************
362 int SffMultipleCommand::execute(){
364 if (abort == true) { if (calledHelp) { return 0; } return 2; }
366 vector<string> sffFiles, oligosFiles;
367 readFile(sffFiles, oligosFiles);
369 outputDir = m->hasPath(filename);
370 string fileroot = outputDir + m->getRootName(m->getSimpleName(filename));
371 map<string, string> variables;
372 variables["[filename]"] = fileroot;
373 string fasta = fileroot + getOutputFileName("fasta",variables);
374 string name = fileroot + getOutputFileName("name",variables);
375 string group = fileroot + getOutputFileName("group",variables);
377 if (m->control_pressed) { return 0; }
379 if (sffFiles.size() < processors) { processors = sffFiles.size(); }
381 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
383 //trim.flows, shhh.flows cannot handle multiple processors for windows.
384 processors = 1; m->mothurOut("This command can only use 1 processor on Windows platforms, using 1 processors.\n\n");
386 if (processors == 1) { driver(sffFiles, oligosFiles, 0, sffFiles.size(), fasta, name, group); }
387 else { createProcesses(sffFiles, oligosFiles, fasta, name, group); }
389 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
392 outputNames.push_back(fasta); outputTypes["fasta"].push_back(fasta);
393 m->setFastaFile(fasta);
394 outputNames.push_back(name); outputTypes["name"].push_back(name);
395 m->setNameFile(name);
396 if (makeGroup) { outputNames.push_back(group); outputTypes["group"].push_back(group); m->setGroupFile(group); }
399 //report output filenames
400 m->mothurOutEndLine();
401 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
402 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
403 m->mothurOutEndLine();
407 catch(exception& e) {
408 m->errorOut(e, "SffMultipleCommand", "execute");
412 //**********************************************************************************************************************
413 int SffMultipleCommand::readFile(vector<string>& sffFiles, vector<string>& oligosFiles){
417 m->openInputFile(filename, in);
418 bool allBlank = true;
424 if (m->control_pressed) { break; }
428 sff = m->getFullPathName(sff);
430 //ignore file pairing
431 if(sff[0] == '#'){ while (!in.eof()) { char c = in.get(); if (c == 10 || c == 13){ break; } } m->gobble(in); }
432 else { //check for oligos file
435 // get rest of line in case there is a oligos filename
438 if (c == 10 || c == 13){ break; }
439 else if (c == 32 || c == 9){;} //space or tab
440 else { oligos += c; }
442 sffFiles.push_back(sff);
443 if (oligos != "") { oligos = m->getFullPathName(oligos); allBlank = false; }
444 if (oligos == "") { allFull = false; }
445 oligosFiles.push_back(oligos); //will push a blank if there is not an oligos for this sff file
451 if (allBlank || allFull) { append = true; }
452 if (allFull) { makeGroup = true; }
456 catch(exception& e) {
457 m->errorOut(e, "SffMultipleCommand", "readFile");
461 //**********************************************************************************************************************
462 //runs sffinfo, summary.seqs, trim.flows, shhh.flows, trim.seqs, summary.seqs for each sff file.
463 int SffMultipleCommand::driver(vector<string> sffFiles, vector<string> oligosFiles, int start, int end, string fasta, string name, string group){
465 m->mothurRemove(fasta); m->mothurRemove(name); m->mothurRemove(group);
467 for (int s = start; s < end; s++) {
469 string sff = sffFiles[s];
470 string oligos = oligosFiles[s];
472 m->mothurOut("\n>>>>>\tProcessing " + sff + " (file " + toString(s+1) + " of " + toString(sffFiles.size()) + ")\t<<<<<\n");
475 string inputString = "sff=" + sff + ", flow=T";
476 if (trim) { inputString += ", trim=T"; }
477 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
478 m->mothurOut("Running command: sffinfo(" + inputString + ")"); m->mothurOutEndLine();
479 m->mothurCalling = true;
481 Command* sffCommand = new SffInfoCommand(inputString);
482 sffCommand->execute();
484 if (m->control_pressed){ break; }
486 map<string, vector<string> > filenames = sffCommand->getOutputFiles();
489 m->mothurCalling = false;
490 m->mothurOutEndLine();
492 //run summary.seqs on the fasta file
493 string fastaFile = "";
494 map<string, vector<string> >::iterator it = filenames.find("fasta");
495 if (it != filenames.end()) { if ((it->second).size() != 0) { fastaFile = (it->second)[0]; } }
496 else { m->mothurOut("[ERROR]: sffinfo did not create a fasta file, quitting.\n"); m->control_pressed = true; break; }
498 inputString = "fasta=" + fastaFile + ", processors=1";
499 m->mothurOutEndLine();
500 m->mothurOut("Running command: summary.seqs(" + inputString + ")"); m->mothurOutEndLine();
501 m->mothurCalling = true;
503 Command* summarySeqsCommand = new SeqSummaryCommand(inputString);
504 summarySeqsCommand->execute();
506 if (m->control_pressed){ break; }
508 map<string, vector<string> > temp = summarySeqsCommand->getOutputFiles();
509 mergeOutputFileList(filenames, temp);
511 delete summarySeqsCommand;
512 m->mothurCalling = false;
514 m->mothurOutEndLine();
516 //run trim.flows on the fasta file
517 string flowFile = "";
518 it = filenames.find("flow");
519 if (it != filenames.end()) { if ((it->second).size() != 0) { flowFile = (it->second)[0]; } }
520 else { m->mothurOut("[ERROR]: sffinfo did not create a flow file, quitting.\n"); m->control_pressed = true; break; }
522 inputString = "flow=" + flowFile;
523 if (oligos != "") { inputString += ", oligos=" + oligos; }
524 inputString += ", maxhomop=" + toString(maxHomoP) + ", maxflows=" + toString(maxFlows) + ", minflows=" + toString(minFlows);
525 inputString += ", pdiffs=" + toString(pdiffs) + ", bdiffs=" + toString(bdiffs) + ", ldiffs=" + toString(ldiffs) + ", sdiffs=" + toString(sdiffs);
526 inputString += ", tdiffs=" + toString(tdiffs) + ", signal=" + toString(signal) + ", noise=" + toString(noise) + ", order=" + flowOrder + ", processors=1";
528 m->mothurOutEndLine();
529 m->mothurOut("Running command: trim.flows(" + inputString + ")"); m->mothurOutEndLine();
530 m->mothurCalling = true;
532 Command* trimFlowCommand = new TrimFlowsCommand(inputString);
533 trimFlowCommand->execute();
535 if (m->control_pressed){ break; }
537 temp = trimFlowCommand->getOutputFiles();
538 mergeOutputFileList(filenames, temp);
540 delete trimFlowCommand;
541 m->mothurCalling = false;
544 string fileFileName = "";
547 it = temp.find("file");
548 if (it != temp.end()) { if ((it->second).size() != 0) { fileFileName = (it->second)[0]; } }
549 else { m->mothurOut("[ERROR]: trim.flows did not create a file file, quitting.\n"); m->control_pressed = true; break; }
551 vector<string> flowFiles;
552 it = temp.find("flow");
553 if (it != temp.end()) { if ((it->second).size() != 0) { flowFiles = (it->second); } }
554 else { m->mothurOut("[ERROR]: trim.flows did not create a flow file, quitting.\n"); m->control_pressed = true; break; }
556 for (int i = 0; i < flowFiles.size(); i++) {
557 string end = flowFiles[i].substr(flowFiles[i].length()-9);
558 if (end == "trim.flow") {
559 flowFile = flowFiles[i]; i+=flowFiles.size(); //if we found the trim.flow file stop looking
564 if ((fileFileName == "") && (flowFile == "")) { m->mothurOut("[ERROR]: trim.flows did not create a file file or a trim.flow file, quitting.\n"); m->control_pressed = true; break; }
566 if (fileFileName != "") { inputString = "file=" + fileFileName; }
567 else { inputString = "flow=" + flowFile; }
569 inputString += ", lookup=" + lookupFileName + ", cutoff=" + toString(cutoff); + ", maxiters=" + toString(maxIters);
570 if (large) { inputString += ", large=" + toString(largeSize); }
571 inputString += ", sigma=" +toString(sigma);
572 inputString += ", mindelta=" + toString(minDelta);
573 inputString += ", order=" + flowOrder + ", processors=1";
576 m->mothurOutEndLine();
577 m->mothurOut("Running command: shhh.flows(" + inputString + ")"); m->mothurOutEndLine();
578 m->mothurCalling = true;
580 Command* shhhFlowCommand = new ShhherCommand(inputString);
581 shhhFlowCommand->execute();
583 if (m->control_pressed){ break; }
585 temp = shhhFlowCommand->getOutputFiles();
586 mergeOutputFileList(filenames, temp);
588 delete shhhFlowCommand;
589 m->mothurCalling = false;
591 vector<string> fastaFiles;
592 vector<string> nameFiles;
593 it = temp.find("fasta");
594 if (it != temp.end()) { if ((it->second).size() != 0) { fastaFiles = (it->second); } }
595 else { m->mothurOut("[ERROR]: shhh.flows did not create a fasta file, quitting.\n"); m->control_pressed = true; break; }
597 it = temp.find("name");
598 if (it != temp.end()) { if ((it->second).size() != 0) { nameFiles = (it->second); } }
599 else { m->mothurOut("[ERROR]: shhh.flows did not create a name file, quitting.\n"); m->control_pressed = true; break; }
601 //find fasta and name files with the shortest name. This is because if there is a composite name it will be the shortest.
602 fastaFile = fastaFiles[0];
603 for (int i = 1; i < fastaFiles.size(); i++) { if (fastaFiles[i].length() < fastaFile.length()) { fastaFile = fastaFiles[i]; } }
604 string nameFile = nameFiles[0];
605 for (int i = 1; i < nameFiles.size(); i++) { if (nameFiles[i].length() < nameFile.length()) { nameFile = nameFiles[i]; } }
607 inputString = "fasta=" + fastaFile + ", name=" + nameFile;
608 if (oligos != "") { inputString += ", oligos=" + oligos; }
609 if (allFiles) { inputString += ", allfiles=t"; }
610 else { inputString += ", allfiles=f"; }
611 if (flip) { inputString += ", flip=t"; }
612 else { inputString += ", flip=f"; }
613 if (keepforward) { inputString += ", keepforward=t"; }
614 else { inputString += ", keepforward=f"; }
617 inputString += ", pdiffs=" + toString(pdiffs) + ", bdiffs=" + toString(bdiffs) + ", ldiffs=" + toString(ldiffs) + ", sdiffs=" + toString(sdiffs);
618 inputString += ", tdiffs=" + toString(tdiffs) + ", maxambig=" + toString(maxAmbig) + ", minlength=" + toString(minLength) + ", maxlength=" + toString(maxLength);
619 if (keepFirst != 0) { inputString += ", keepfirst=" + toString(keepFirst); }
620 if (removeLast != 0) { inputString += ", removelast=" + toString(removeLast); }
621 inputString += ", processors=1";
624 m->mothurOutEndLine();
625 m->mothurOut("Running command: trim.seqs(" + inputString + ")"); m->mothurOutEndLine();
626 m->mothurCalling = true;
628 Command* trimseqsCommand = new TrimSeqsCommand(inputString);
629 trimseqsCommand->execute();
631 if (m->control_pressed){ break; }
633 temp = trimseqsCommand->getOutputFiles();
634 mergeOutputFileList(filenames, temp);
636 delete trimseqsCommand;
637 m->mothurCalling = false;
639 it = temp.find("fasta");
640 if (it != temp.end()) { if ((it->second).size() != 0) { fastaFiles = (it->second); } }
641 else { m->mothurOut("[ERROR]: trim.seqs did not create a fasta file, quitting.\n"); m->control_pressed = true; break; }
643 for (int i = 0; i < fastaFiles.size(); i++) {
644 string end = fastaFiles[i].substr(fastaFiles[i].length()-10);
645 if (end == "trim.fasta") {
646 fastaFile = fastaFiles[i]; i+=fastaFiles.size(); //if we found the trim.fasta file stop looking
650 it = temp.find("name");
651 if (it != temp.end()) { if ((it->second).size() != 0) { nameFiles = (it->second); } }
652 else { m->mothurOut("[ERROR]: trim.seqs did not create a name file, quitting.\n"); m->control_pressed = true; break; }
654 for (int i = 0; i < nameFiles.size(); i++) {
655 string end = nameFiles[i].substr(nameFiles[i].length()-10);
656 if (end == "trim.names") {
657 nameFile = nameFiles[i]; i+=nameFiles.size(); //if we found the trim.names file stop looking
661 vector<string> groupFiles;
662 string groupFile = "";
664 it = temp.find("group");
665 if (it != temp.end()) { if ((it->second).size() != 0) { groupFiles = (it->second); } }
667 //find group file with the shortest name. This is because if there is a composite group file it will be the shortest.
668 groupFile = groupFiles[0];
669 for (int i = 1; i < groupFiles.size(); i++) { if (groupFiles[i].length() < groupFile.length()) { groupFile = groupFiles[i]; } }
672 inputString = "fasta=" + fastaFile + ", processors=1, name=" + nameFile;
673 m->mothurOutEndLine();
674 m->mothurOut("Running command: summary.seqs(" + inputString + ")"); m->mothurOutEndLine();
675 m->mothurCalling = true;
677 summarySeqsCommand = new SeqSummaryCommand(inputString);
678 summarySeqsCommand->execute();
680 if (m->control_pressed){ break; }
682 temp = summarySeqsCommand->getOutputFiles();
683 mergeOutputFileList(filenames, temp);
685 delete summarySeqsCommand;
686 m->mothurCalling = false;
688 m->mothurOutEndLine();
689 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
692 m->appendFiles(fastaFile, fasta);
693 m->appendFiles(nameFile, name);
694 if (makeGroup) { m->appendFiles(groupFile, group); }
698 for (it = filenames.begin(); it != filenames.end(); it++) {
699 for (int i = 0; i < (it->second).size(); i++) {
700 outputNames.push_back((it->second)[i]); outputTypes[it->first].push_back((it->second)[i]);
707 catch(exception& e) {
708 m->errorOut(e, "SffMultipleCommand", "driver");
712 //**********************************************************************************************************************
713 int SffMultipleCommand::mergeOutputFileList(map<string, vector<string> >& files, map<string, vector<string> >& temp){
715 map<string, vector<string> >::iterator it;
716 for (it = temp.begin(); it != temp.end(); it++) {
717 map<string, vector<string> >::iterator it2 = files.find(it->first);
718 if (it2 == files.end()) { //we do not already have this type so just add it
719 files[it->first] = it->second;
721 for (int i = 0; i < (it->second).size(); i++) {
722 files[it->first].push_back((it->second)[i]);
729 catch(exception& e) {
730 m->errorOut(e, "SffMultipleCommand", "mergeOutputFileList");
734 //**********************************************************************************************************************
735 int SffMultipleCommand::createProcesses(vector<string> sffFiles, vector<string> oligosFiles, string fasta, string name, string group){
737 vector<int> processIDS;
741 //divide the groups between the processors
742 vector<linePair> lines;
743 vector<int> numFilesToComplete;
744 int numFilesPerProcessor = sffFiles.size() / processors;
745 for (int i = 0; i < processors; i++) {
746 int startIndex = i * numFilesPerProcessor;
747 int endIndex = (i+1) * numFilesPerProcessor;
748 if(i == (processors - 1)){ endIndex = sffFiles.size(); }
749 lines.push_back(linePair(startIndex, endIndex));
750 numFilesToComplete.push_back((endIndex-startIndex));
753 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
755 //loop through and create all the processes you want
756 while (process != processors) {
760 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
763 num = driver(sffFiles, oligosFiles, lines[process].start, lines[process].end, fasta + toString(getpid()) + ".temp", name + toString(getpid()) + ".temp", group + toString(getpid()) + ".temp");
765 //pass numSeqs to parent
767 string tempFile = toString(getpid()) + ".num.temp";
768 m->openOutputFile(tempFile, out);
769 out << num << '\t' << outputNames.size() << endl;
770 for (int i = 0; i < outputNames.size(); i++) { out << outputNames[i] << endl; }
775 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
776 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
782 num = driver(sffFiles, oligosFiles, lines[0].start, lines[0].end, fasta, name, group);
784 //force parent to wait until all the processes are done
785 for (int i=0;i<processIDS.size();i++) {
786 int temp = processIDS[i];
790 for (int i=0;i<processIDS.size();i++) {
792 string tempFile = toString(processIDS[i]) + ".num.temp";
793 m->openInputFile(tempFile, in);
795 int tempNum = 0; int outputNamesSize = 0;
796 in >> tempNum >> outputNamesSize; m->gobble(in);
797 for (int j = 0; j < outputNamesSize; j++) {
799 in >> tempName; m->gobble(in);
800 outputNames.push_back(tempName);
802 if (tempNum != numFilesToComplete[i+1]) {
803 m->mothurOut("[ERROR]: main process expected " + toString(processIDS[i]) + " to complete " + toString(numFilesToComplete[i+1]) + " files, and it only reported completing " + toString(tempNum) + ". This will cause file mismatches. The flow files may be too large to process with multiple processors. \n");
806 in.close(); m->mothurRemove(tempFile);
809 m->appendFiles(fasta+toString(processIDS[i])+".temp", fasta); m->mothurRemove(fasta+toString(processIDS[i])+".temp");
810 m->appendFiles(name+toString(processIDS[i])+".temp", name); m->mothurRemove(name+toString(processIDS[i])+".temp");
811 if (makeGroup) { m->appendFiles(group+toString(processIDS[i])+".temp", group); m->mothurRemove(group+toString(processIDS[i])+".temp"); }
818 catch(exception& e) {
819 m->errorOut(e, "ShhherCommand", "createProcesses");
823 //**********************************************************************************************************************