2 // sffmultiplecommand.cpp
5 // Created by Sarah Westcott on 8/14/12.
6 // Copyright (c) 2012 Schloss Lab. All rights reserved.
9 #include "sffmultiplecommand.h"
13 //**********************************************************************************************************************
14 vector<string> SffMultipleCommand::setParameters(){
16 CommandParameter pfile("file", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfile);
19 CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(ptrim);
22 CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "",false,false); parameters.push_back(pmaxhomop);
23 CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "",false,false); parameters.push_back(pmaxflows);
24 CommandParameter pminflows("minflows", "Number", "", "450", "", "", "",false,false); parameters.push_back(pminflows);
25 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
26 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
27 CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
28 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
29 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
30 CommandParameter psignal("signal", "Number", "", "0.50", "", "", "",false,false); parameters.push_back(psignal);
31 CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pnoise);
32 CommandParameter porder("order", "String", "", "TACG", "", "", "",false,false); parameters.push_back(porder);
35 CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plookup);
36 CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "",false,false); parameters.push_back(pcutoff);
37 CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "",false,false); parameters.push_back(pmaxiter);
38 CommandParameter plarge("large", "Number", "", "-1", "", "", "",false,false); parameters.push_back(plarge);
39 CommandParameter psigma("sigma", "Number", "", "60", "", "", "",false,false); parameters.push_back(psigma);
40 CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "",false,false); parameters.push_back(pmindelta);
42 //trim.seqs parameters
43 CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "",false,false); parameters.push_back(pallfiles);
44 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
45 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
46 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
47 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
48 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
49 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
50 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
53 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
54 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
55 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
57 vector<string> myArray;
58 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
62 m->errorOut(e, "SffMultipleCommand", "setParameters");
66 //**********************************************************************************************************************
67 string SffMultipleCommand::getHelpString(){
69 string helpString = "";
70 helpString += "The sff.multiple command reads a file containing sff filenames and optional oligos filenames. It runs the files through sffinfo, trim.flows, shhh.flows and trim.seqs combining the results.\n";
71 helpString += "The sff.multiple command parameters are: ";
72 vector<string> parameters = setParameters();
73 for (int i = 0; i < parameters.size()-1; i++) {
74 helpString += parameters[i] + ", ";
76 helpString += parameters[parameters.size()-1] + ".\n";
77 helpString += "The file parameter allows you to enter the a file containing the list of sff files and optional oligos files.\n";
78 helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores generated by sffinfo trimmed to the clipQualLeft and clipQualRight values. Default=True. \n";
79 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
80 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
81 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
82 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
83 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
84 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
85 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
86 helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
87 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
88 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
89 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
90 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
91 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
93 helpString += "Example sff.multiple(file=mySffOligosFile.txt, trim=F).\n";
94 helpString += "Note: No spaces between parameter labels (i.e. file), '=' and parameters (i.e.mySffOligosFile.txt).\n";
98 m->errorOut(e, "SffMultipleCommand", "getHelpString");
102 //**********************************************************************************************************************
103 string SffMultipleCommand::getOutputFileNameTag(string type, string inputName=""){
105 string outputFileName = "";
106 map<string, vector<string> >::iterator it;
108 //is this a type this command creates
109 it = outputTypes.find(type);
110 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
112 if (type == "fasta") { outputFileName = "fasta"; }
113 else if (type == "name") { outputFileName = "names"; }
114 else if (type == "group") { outputFileName = "groups"; }
115 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
117 return outputFileName;
119 catch(exception& e) {
120 m->errorOut(e, "SffMultipleCommand", "getOutputFileNameTag");
126 //**********************************************************************************************************************
127 SffMultipleCommand::SffMultipleCommand(){
129 abort = true; calledHelp = true;
131 vector<string> tempOutNames;
132 outputTypes["fasta"] = tempOutNames;
133 outputTypes["name"] = tempOutNames;
134 outputTypes["group"] = tempOutNames;
135 outputTypes["flow"] = tempOutNames;
136 outputTypes["qfile"] = tempOutNames;
138 catch(exception& e) {
139 m->errorOut(e, "SffMultipleCommand", "SffMultipleCommand");
143 //**********************************************************************************************************************
145 SffMultipleCommand::SffMultipleCommand(string option) {
147 abort = false; calledHelp = false; append=false; makeGroup=false;
149 //allow user to run help
150 if(option == "help") { help(); abort = true; calledHelp = true; }
151 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
154 //valid paramters for this command
155 vector<string> myArray = setParameters();
157 OptionParser parser(option);
158 map<string, string> parameters = parser.getParameters();
160 ValidParameters validParameter;
161 map<string,string>::iterator it;
163 //check to make sure all parameters are valid for command
164 for (it = parameters.begin(); it != parameters.end(); it++) {
165 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
168 //initialize outputTypes
169 vector<string> tempOutNames;
170 outputTypes["fasta"] = tempOutNames;
171 outputTypes["flow"] = tempOutNames;
172 outputTypes["qfile"] = tempOutNames;
173 outputTypes["name"] = tempOutNames;
174 outputTypes["group"] = tempOutNames;
177 //if the user changes the output directory command factory will send this info to us in the output parameter
178 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
180 //if the user changes the input directory command factory will send this info to us in the output parameter
181 string inputDir = validParameter.validFile(parameters, "inputdir", false);
182 if (inputDir == "not found"){ inputDir = ""; }
185 it = parameters.find("file");
186 //user has given a template file
187 if(it != parameters.end()){
188 path = m->hasPath(it->second);
189 //if the user has not given a path then, add inputdir. else leave path alone.
190 if (path == "") { parameters["file"] = inputDir + it->second; }
193 it = parameters.find("lookup");
194 //user has given a template file
195 if(it != parameters.end()){
196 path = m->hasPath(it->second);
197 //if the user has not given a path then, add inputdir. else leave path alone.
198 if (path == "") { parameters["lookup"] = inputDir + it->second; }
202 filename = validParameter.validFile(parameters, "file", true);
203 if (filename == "not open") { filename = ""; abort = true; }
204 else if (filename == "not found") { filename = ""; }
207 temp = validParameter.validFile(parameters, "trim", false); if (temp == "not found"){ temp = "T"; }
208 trim = m->isTrue(temp);
210 temp = validParameter.validFile(parameters, "minflows", false); if (temp == "not found") { temp = "450"; }
211 m->mothurConvert(temp, minFlows);
213 temp = validParameter.validFile(parameters, "maxflows", false); if (temp == "not found") { temp = "450"; }
214 m->mothurConvert(temp, maxFlows);
216 temp = validParameter.validFile(parameters, "maxhomop", false); if (temp == "not found"){ temp = "9"; }
217 m->mothurConvert(temp, maxHomoP);
219 temp = validParameter.validFile(parameters, "signal", false); if (temp == "not found"){ temp = "0.50"; }
220 m->mothurConvert(temp, signal);
222 temp = validParameter.validFile(parameters, "noise", false); if (temp == "not found"){ temp = "0.70"; }
223 m->mothurConvert(temp, noise);
225 temp = validParameter.validFile(parameters, "bdiffs", false); if (temp == "not found"){ temp = "0"; }
226 m->mothurConvert(temp, bdiffs);
228 temp = validParameter.validFile(parameters, "pdiffs", false); if (temp == "not found"){ temp = "0"; }
229 m->mothurConvert(temp, pdiffs);
231 temp = validParameter.validFile(parameters, "ldiffs", false); if (temp == "not found") { temp = "0"; }
232 m->mothurConvert(temp, ldiffs);
234 temp = validParameter.validFile(parameters, "sdiffs", false); if (temp == "not found") { temp = "0"; }
235 m->mothurConvert(temp, sdiffs);
237 temp = validParameter.validFile(parameters, "tdiffs", false); if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs; temp = toString(tempTotal); }
238 m->mothurConvert(temp, tdiffs);
240 if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; }
243 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
244 m->setProcessors(temp);
245 m->mothurConvert(temp, processors);
247 flowOrder = validParameter.validFile(parameters, "order", false);
248 if (flowOrder == "not found"){ flowOrder = "TACG"; }
249 else if(flowOrder.length() != 4){
250 m->mothurOut("The value of the order option must be four bases long\n");
253 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found"){ temp = "0.01"; }
254 m->mothurConvert(temp, cutoff);
256 temp = validParameter.validFile(parameters, "mindelta", false); if (temp == "not found"){ temp = "0.000001"; }
259 temp = validParameter.validFile(parameters, "maxiter", false); if (temp == "not found"){ temp = "1000"; }
260 m->mothurConvert(temp, maxIters);
262 temp = validParameter.validFile(parameters, "large", false); if (temp == "not found"){ temp = "0"; }
263 m->mothurConvert(temp, largeSize);
264 if (largeSize != 0) { large = true; }
265 else { large = false; }
266 if (largeSize < 0) { m->mothurOut("The value of the large cannot be negative.\n"); }
268 temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found") { temp = "60"; }
269 m->mothurConvert(temp, sigma);
271 temp = validParameter.validFile(parameters, "flip", false);
272 if (temp == "not found") { flip = 0; }
273 else { flip = m->isTrue(temp); }
275 temp = validParameter.validFile(parameters, "maxambig", false); if (temp == "not found") { temp = "-1"; }
276 m->mothurConvert(temp, maxAmbig);
278 temp = validParameter.validFile(parameters, "minlength", false); if (temp == "not found") { temp = "0"; }
279 m->mothurConvert(temp, minLength);
281 temp = validParameter.validFile(parameters, "maxlength", false); if (temp == "not found") { temp = "0"; }
282 m->mothurConvert(temp, maxLength);
284 temp = validParameter.validFile(parameters, "keepfirst", false); if (temp == "not found") { temp = "0"; }
285 convert(temp, keepFirst);
287 temp = validParameter.validFile(parameters, "removelast", false); if (temp == "not found") { temp = "0"; }
288 convert(temp, removeLast);
290 temp = validParameter.validFile(parameters, "allfiles", false); if (temp == "not found") { temp = "F"; }
291 allFiles = m->isTrue(temp);
293 temp = validParameter.validFile(parameters, "keepforward", false); if (temp == "not found") { temp = "F"; }
294 keepforward = m->isTrue(temp);
296 temp = validParameter.validFile(parameters, "lookup", true);
297 if (temp == "not found") {
298 lookupFileName = "LookUp_Titanium.pat";
302 ableToOpen = m->openInputFile(lookupFileName, in, "noerror");
305 //if you can't open it, try input location
306 if (ableToOpen == 1) {
307 if (inputDir != "") { //default path is set
308 string tryPath = inputDir + lookupFileName;
309 m->mothurOut("Unable to open " + lookupFileName + ". Trying input directory " + tryPath); m->mothurOutEndLine();
311 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
313 lookupFileName = tryPath;
317 //if you can't open it, try default location
318 if (ableToOpen == 1) {
319 if (m->getDefaultPath() != "") { //default path is set
320 string tryPath = m->getDefaultPath() + m->getSimpleName(lookupFileName);
321 m->mothurOut("Unable to open " + lookupFileName + ". Trying default " + tryPath); m->mothurOutEndLine();
323 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
325 lookupFileName = tryPath;
329 //if you can't open it its not in current working directory or inputDir, try mothur excutable location
330 if (ableToOpen == 1) {
331 string exepath = m->argv;
332 string tempPath = exepath;
333 for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
334 exepath = exepath.substr(0, (tempPath.find_last_of('m')));
336 string tryPath = m->getFullPathName(exepath) + m->getSimpleName(lookupFileName);
337 m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
339 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
341 lookupFileName = tryPath;
344 if (ableToOpen == 1) { m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true; }
346 else if(temp == "not open") {
348 lookupFileName = validParameter.validFile(parameters, "lookup", false);
350 //if you can't open it its not inputDir, try mothur excutable location
351 string exepath = m->argv;
352 string tempPath = exepath;
353 for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
354 exepath = exepath.substr(0, (tempPath.find_last_of('m')));
356 string tryPath = m->getFullPathName(exepath) + lookupFileName;
357 m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
359 int ableToOpen = m->openInputFile(tryPath, in2, "noerror");
361 lookupFileName = tryPath;
363 if (ableToOpen == 1) { m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true; }
364 }else { lookupFileName = temp; }
367 catch(exception& e) {
368 m->errorOut(e, "SffMultipleCommand", "SffMultipleCommand");
372 //**********************************************************************************************************************
373 int SffMultipleCommand::execute(){
375 if (abort == true) { if (calledHelp) { return 0; } return 2; }
377 vector<string> sffFiles, oligosFiles;
378 readFile(sffFiles, oligosFiles);
380 outputDir = m->hasPath(filename);
381 string fileroot = outputDir + m->getRootName(m->getSimpleName(filename));
382 string fasta = fileroot + getOutputFileNameTag("fasta");
383 string name = fileroot + getOutputFileNameTag("name");
384 string group = fileroot + getOutputFileNameTag("group");
386 if (m->control_pressed) { return 0; }
388 if (sffFiles.size() < processors) { processors = sffFiles.size(); }
390 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
392 //trim.flows, shhh.flows cannot handle multiple processors for windows.
393 processors = 1; m->mothurOut("This command can only use 1 processor on Windows platforms, using 1 processors.\n\n");
395 if (processors == 1) { driver(sffFiles, oligosFiles, 0, sffFiles.size(), fasta, name, group); }
396 else { createProcesses(sffFiles, oligosFiles, fasta, name, group); }
398 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
401 outputNames.push_back(fasta); outputTypes["fasta"].push_back(fasta);
402 m->setFastaFile(fasta);
403 outputNames.push_back(name); outputTypes["name"].push_back(name);
404 m->setNameFile(name);
405 if (makeGroup) { outputNames.push_back(group); outputTypes["group"].push_back(group); m->setGroupFile(group); }
408 //report output filenames
409 m->mothurOutEndLine();
410 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
411 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
412 m->mothurOutEndLine();
416 catch(exception& e) {
417 m->errorOut(e, "SffMultipleCommand", "execute");
421 //**********************************************************************************************************************
422 int SffMultipleCommand::readFile(vector<string>& sffFiles, vector<string>& oligosFiles){
426 m->openInputFile(filename, in);
427 bool allBlank = true;
433 if (m->control_pressed) { break; }
437 sff = m->getFullPathName(sff);
439 //ignore file pairing
440 if(sff[0] == '#'){ while (!in.eof()) { char c = in.get(); if (c == 10 || c == 13){ break; } } m->gobble(in); }
441 else { //check for oligos file
444 // get rest of line in case there is a oligos filename
447 if (c == 10 || c == 13){ break; }
448 else if (c == 32 || c == 9){;} //space or tab
449 else { oligos += c; }
451 sffFiles.push_back(sff);
452 if (oligos != "") { oligos = m->getFullPathName(oligos); allBlank = false; }
453 if (oligos == "") { allFull = false; }
454 oligosFiles.push_back(oligos); //will push a blank if there is not an oligos for this sff file
460 if (allBlank || allFull) { append = true; }
461 if (allFull) { makeGroup = true; }
465 catch(exception& e) {
466 m->errorOut(e, "SffMultipleCommand", "readFile");
470 //**********************************************************************************************************************
471 //runs sffinfo, summary.seqs, trim.flows, shhh.flows, trim.seqs, summary.seqs for each sff file.
472 int SffMultipleCommand::driver(vector<string> sffFiles, vector<string> oligosFiles, int start, int end, string fasta, string name, string group){
474 m->mothurRemove(fasta); m->mothurRemove(name); m->mothurRemove(group);
476 for (int s = start; s < end; s++) {
478 string sff = sffFiles[s];
479 string oligos = oligosFiles[s];
481 m->mothurOut("\n>>>>>\tProcessing " + sff + " (file " + toString(s+1) + " of " + toString(sffFiles.size()) + ")\t<<<<<\n");
484 string inputString = "sff=" + sff + ", flow=T";
485 if (trim) { inputString += ", trim=T"; }
486 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
487 m->mothurOut("Running command: sffinfo(" + inputString + ")"); m->mothurOutEndLine();
488 m->mothurCalling = true;
490 Command* sffCommand = new SffInfoCommand(inputString);
491 sffCommand->execute();
493 if (m->control_pressed){ break; }
495 map<string, vector<string> > filenames = sffCommand->getOutputFiles();
498 m->mothurCalling = false;
499 m->mothurOutEndLine();
501 //run summary.seqs on the fasta file
502 string fastaFile = "";
503 map<string, vector<string> >::iterator it = filenames.find("fasta");
504 if (it != filenames.end()) { if ((it->second).size() != 0) { fastaFile = (it->second)[0]; } }
505 else { m->mothurOut("[ERROR]: sffinfo did not create a fasta file, quitting.\n"); m->control_pressed = true; break; }
507 inputString = "fasta=" + fastaFile + ", processors=1";
508 m->mothurOutEndLine();
509 m->mothurOut("Running command: summary.seqs(" + inputString + ")"); m->mothurOutEndLine();
510 m->mothurCalling = true;
512 Command* summarySeqsCommand = new SeqSummaryCommand(inputString);
513 summarySeqsCommand->execute();
515 if (m->control_pressed){ break; }
517 map<string, vector<string> > temp = summarySeqsCommand->getOutputFiles();
518 mergeOutputFileList(filenames, temp);
520 delete summarySeqsCommand;
521 m->mothurCalling = false;
523 m->mothurOutEndLine();
525 //run trim.flows on the fasta file
526 string flowFile = "";
527 it = filenames.find("flow");
528 if (it != filenames.end()) { if ((it->second).size() != 0) { flowFile = (it->second)[0]; } }
529 else { m->mothurOut("[ERROR]: sffinfo did not create a flow file, quitting.\n"); m->control_pressed = true; break; }
531 inputString = "flow=" + flowFile;
532 if (oligos != "") { inputString += ", oligos=" + oligos; }
533 inputString += ", maxhomop=" + toString(maxHomoP) + ", maxflows=" + toString(maxFlows) + ", minflows=" + toString(minFlows);
534 inputString += ", pdiffs=" + toString(pdiffs) + ", bdiffs=" + toString(bdiffs) + ", ldiffs=" + toString(ldiffs) + ", sdiffs=" + toString(sdiffs);
535 inputString += ", tdiffs=" + toString(tdiffs) + ", signal=" + toString(signal) + ", noise=" + toString(noise) + ", order=" + flowOrder + ", processors=1";
537 m->mothurOutEndLine();
538 m->mothurOut("Running command: trim.flows(" + inputString + ")"); m->mothurOutEndLine();
539 m->mothurCalling = true;
541 Command* trimFlowCommand = new TrimFlowsCommand(inputString);
542 trimFlowCommand->execute();
544 if (m->control_pressed){ break; }
546 temp = trimFlowCommand->getOutputFiles();
547 mergeOutputFileList(filenames, temp);
549 delete trimFlowCommand;
550 m->mothurCalling = false;
553 string fileFileName = "";
556 it = temp.find("file");
557 if (it != temp.end()) { if ((it->second).size() != 0) { fileFileName = (it->second)[0]; } }
558 else { m->mothurOut("[ERROR]: trim.flows did not create a file file, quitting.\n"); m->control_pressed = true; break; }
560 vector<string> flowFiles;
561 it = temp.find("flow");
562 if (it != temp.end()) { if ((it->second).size() != 0) { flowFiles = (it->second); } }
563 else { m->mothurOut("[ERROR]: trim.flows did not create a flow file, quitting.\n"); m->control_pressed = true; break; }
565 for (int i = 0; i < flowFiles.size(); i++) {
566 string end = flowFiles[i].substr(flowFiles[i].length()-9);
567 if (end == "trim.flow") {
568 flowFile = flowFiles[i]; i+=flowFiles.size(); //if we found the trim.flow file stop looking
573 if ((fileFileName == "") && (flowFile == "")) { m->mothurOut("[ERROR]: trim.flows did not create a file file or a trim.flow file, quitting.\n"); m->control_pressed = true; break; }
575 if (fileFileName != "") { inputString = "file=" + fileFileName; }
576 else { inputString = "flow=" + flowFile; }
578 inputString += ", lookup=" + lookupFileName + ", cutoff=" + toString(cutoff); + ", maxiters=" + toString(maxIters);
579 if (large) { inputString += ", large=" + toString(largeSize); }
580 inputString += ", sigma=" +toString(sigma);
581 inputString += ", mindelta=" + toString(minDelta);
582 inputString += ", order=" + flowOrder + ", processors=1";
585 m->mothurOutEndLine();
586 m->mothurOut("Running command: shhh.flows(" + inputString + ")"); m->mothurOutEndLine();
587 m->mothurCalling = true;
589 Command* shhhFlowCommand = new ShhherCommand(inputString);
590 shhhFlowCommand->execute();
592 if (m->control_pressed){ break; }
594 temp = shhhFlowCommand->getOutputFiles();
595 mergeOutputFileList(filenames, temp);
597 delete shhhFlowCommand;
598 m->mothurCalling = false;
600 vector<string> fastaFiles;
601 vector<string> nameFiles;
602 it = temp.find("fasta");
603 if (it != temp.end()) { if ((it->second).size() != 0) { fastaFiles = (it->second); } }
604 else { m->mothurOut("[ERROR]: shhh.flows did not create a fasta file, quitting.\n"); m->control_pressed = true; break; }
606 it = temp.find("name");
607 if (it != temp.end()) { if ((it->second).size() != 0) { nameFiles = (it->second); } }
608 else { m->mothurOut("[ERROR]: shhh.flows did not create a name file, quitting.\n"); m->control_pressed = true; break; }
610 //find fasta and name files with the shortest name. This is because if there is a composite name it will be the shortest.
611 fastaFile = fastaFiles[0];
612 for (int i = 1; i < fastaFiles.size(); i++) { if (fastaFiles[i].length() < fastaFile.length()) { fastaFile = fastaFiles[i]; } }
613 string nameFile = nameFiles[0];
614 for (int i = 1; i < nameFiles.size(); i++) { if (nameFiles[i].length() < nameFile.length()) { nameFile = nameFiles[i]; } }
616 inputString = "fasta=" + fastaFile + ", name=" + nameFile;
617 if (oligos != "") { inputString += ", oligos=" + oligos; }
618 if (allFiles) { inputString += ", allfiles=t"; }
619 else { inputString += ", allfiles=f"; }
620 if (flip) { inputString += ", flip=t"; }
621 else { inputString += ", flip=f"; }
622 if (keepforward) { inputString += ", keepforward=t"; }
623 else { inputString += ", keepforward=f"; }
626 inputString += ", pdiffs=" + toString(pdiffs) + ", bdiffs=" + toString(bdiffs) + ", ldiffs=" + toString(ldiffs) + ", sdiffs=" + toString(sdiffs);
627 inputString += ", tdiffs=" + toString(tdiffs) + ", maxambig=" + toString(maxAmbig) + ", minlength=" + toString(minLength) + ", maxlength=" + toString(maxLength);
628 if (keepFirst != 0) { inputString += ", keepfirst=" + toString(keepFirst); }
629 if (removeLast != 0) { inputString += ", removelast=" + toString(removeLast); }
630 inputString += ", processors=1";
633 m->mothurOutEndLine();
634 m->mothurOut("Running command: trim.seqs(" + inputString + ")"); m->mothurOutEndLine();
635 m->mothurCalling = true;
637 Command* trimseqsCommand = new TrimSeqsCommand(inputString);
638 trimseqsCommand->execute();
640 if (m->control_pressed){ break; }
642 temp = trimseqsCommand->getOutputFiles();
643 mergeOutputFileList(filenames, temp);
645 delete trimseqsCommand;
646 m->mothurCalling = false;
648 it = temp.find("fasta");
649 if (it != temp.end()) { if ((it->second).size() != 0) { fastaFiles = (it->second); } }
650 else { m->mothurOut("[ERROR]: trim.seqs did not create a fasta file, quitting.\n"); m->control_pressed = true; break; }
652 for (int i = 0; i < fastaFiles.size(); i++) {
653 string end = fastaFiles[i].substr(fastaFiles[i].length()-10);
654 if (end == "trim.fasta") {
655 fastaFile = fastaFiles[i]; i+=fastaFiles.size(); //if we found the trim.fasta file stop looking
659 it = temp.find("name");
660 if (it != temp.end()) { if ((it->second).size() != 0) { nameFiles = (it->second); } }
661 else { m->mothurOut("[ERROR]: trim.seqs did not create a name file, quitting.\n"); m->control_pressed = true; break; }
663 for (int i = 0; i < nameFiles.size(); i++) {
664 string end = nameFiles[i].substr(nameFiles[i].length()-10);
665 if (end == "trim.names") {
666 nameFile = nameFiles[i]; i+=nameFiles.size(); //if we found the trim.names file stop looking
670 vector<string> groupFiles;
671 string groupFile = "";
673 it = temp.find("group");
674 if (it != temp.end()) { if ((it->second).size() != 0) { groupFiles = (it->second); } }
676 //find group file with the shortest name. This is because if there is a composite group file it will be the shortest.
677 groupFile = groupFiles[0];
678 for (int i = 1; i < groupFiles.size(); i++) { if (groupFiles[i].length() < groupFile.length()) { groupFile = groupFiles[i]; } }
681 inputString = "fasta=" + fastaFile + ", processors=1, name=" + nameFile;
682 m->mothurOutEndLine();
683 m->mothurOut("Running command: summary.seqs(" + inputString + ")"); m->mothurOutEndLine();
684 m->mothurCalling = true;
686 summarySeqsCommand = new SeqSummaryCommand(inputString);
687 summarySeqsCommand->execute();
689 if (m->control_pressed){ break; }
691 temp = summarySeqsCommand->getOutputFiles();
692 mergeOutputFileList(filenames, temp);
694 delete summarySeqsCommand;
695 m->mothurCalling = false;
697 m->mothurOutEndLine();
698 m->mothurOut("/******************************************/"); m->mothurOutEndLine();
701 m->appendFiles(fastaFile, fasta);
702 m->appendFiles(nameFile, name);
703 if (makeGroup) { m->appendFiles(groupFile, group); }
707 for (it = filenames.begin(); it != filenames.end(); it++) {
708 for (int i = 0; i < (it->second).size(); i++) {
709 outputNames.push_back((it->second)[i]); outputTypes[it->first].push_back((it->second)[i]);
716 catch(exception& e) {
717 m->errorOut(e, "SffMultipleCommand", "driver");
721 //**********************************************************************************************************************
722 int SffMultipleCommand::mergeOutputFileList(map<string, vector<string> >& files, map<string, vector<string> >& temp){
724 map<string, vector<string> >::iterator it;
725 for (it = temp.begin(); it != temp.end(); it++) {
726 map<string, vector<string> >::iterator it2 = files.find(it->first);
727 if (it2 == files.end()) { //we do not already have this type so just add it
728 files[it->first] = it->second;
730 for (int i = 0; i < (it->second).size(); i++) {
731 files[it->first].push_back((it->second)[i]);
738 catch(exception& e) {
739 m->errorOut(e, "SffMultipleCommand", "mergeOutputFileList");
743 //**********************************************************************************************************************
744 int SffMultipleCommand::createProcesses(vector<string> sffFiles, vector<string> oligosFiles, string fasta, string name, string group){
746 vector<int> processIDS;
750 //divide the groups between the processors
751 vector<linePair> lines;
752 vector<int> numFilesToComplete;
753 int numFilesPerProcessor = sffFiles.size() / processors;
754 for (int i = 0; i < processors; i++) {
755 int startIndex = i * numFilesPerProcessor;
756 int endIndex = (i+1) * numFilesPerProcessor;
757 if(i == (processors - 1)){ endIndex = sffFiles.size(); }
758 lines.push_back(linePair(startIndex, endIndex));
759 numFilesToComplete.push_back((endIndex-startIndex));
762 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
764 //loop through and create all the processes you want
765 while (process != processors) {
769 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
772 num = driver(sffFiles, oligosFiles, lines[process].start, lines[process].end, fasta + toString(getpid()) + ".temp", name + toString(getpid()) + ".temp", group + toString(getpid()) + ".temp");
774 //pass numSeqs to parent
776 string tempFile = toString(getpid()) + ".num.temp";
777 m->openOutputFile(tempFile, out);
778 out << num << '\t' << outputNames.size() << endl;
779 for (int i = 0; i < outputNames.size(); i++) { out << outputNames[i] << endl; }
784 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
785 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
791 num = driver(sffFiles, oligosFiles, lines[0].start, lines[0].end, fasta, name, group);
793 //force parent to wait until all the processes are done
794 for (int i=0;i<processIDS.size();i++) {
795 int temp = processIDS[i];
799 for (int i=0;i<processIDS.size();i++) {
801 string tempFile = toString(processIDS[i]) + ".num.temp";
802 m->openInputFile(tempFile, in);
804 int tempNum = 0; int outputNamesSize = 0;
805 in >> tempNum >> outputNamesSize; m->gobble(in);
806 for (int j = 0; j < outputNamesSize; j++) {
808 in >> tempName; m->gobble(in);
809 outputNames.push_back(tempName);
811 if (tempNum != numFilesToComplete[i+1]) {
812 m->mothurOut("[ERROR]: main process expected " + toString(processIDS[i]) + " to complete " + toString(numFilesToComplete[i+1]) + " files, and it only reported completing " + toString(tempNum) + ". This will cause file mismatches. The flow files may be too large to process with multiple processors. \n");
815 in.close(); m->mothurRemove(tempFile);
818 m->appendFiles(fasta+toString(processIDS[i])+".temp", fasta); m->mothurRemove(fasta+toString(processIDS[i])+".temp");
819 m->appendFiles(name+toString(processIDS[i])+".temp", name); m->mothurRemove(name+toString(processIDS[i])+".temp");
820 if (makeGroup) { m->appendFiles(group+toString(processIDS[i])+".temp", group); m->mothurRemove(group+toString(processIDS[i])+".temp"); }
827 catch(exception& e) {
828 m->errorOut(e, "ShhherCommand", "createProcesses");
832 //**********************************************************************************************************************