]> git.donarmstrong.com Git - mothur.git/blob - sffmultiplecommand.cpp
added primer.design command. fixed bug with linux unifrac subsampling, metastats...
[mothur.git] / sffmultiplecommand.cpp
1 //
2 //  sffmultiplecommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 8/14/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "sffmultiplecommand.h"
10
11
12
13 //**********************************************************************************************************************
14 vector<string> SffMultipleCommand::setParameters(){     
15         try {           
16                 CommandParameter pfile("file", "InputTypes", "", "", "none", "none", "none","fasta-name",false,true,true); parameters.push_back(pfile);
17         
18         //sffinfo
19                 CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(ptrim);
20         
21         //trim.flows
22                 CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "","",false,false); parameters.push_back(pmaxhomop);
23                 CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pmaxflows);
24                 CommandParameter pminflows("minflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pminflows);
25                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
26                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
27         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
28                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
29         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
30                 CommandParameter psignal("signal", "Number", "", "0.50", "", "", "","",false,false); parameters.push_back(psignal);
31                 CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "","",false,false); parameters.push_back(pnoise);
32                 CommandParameter porder("order", "String", "", "TACG", "", "", "","",false,false); parameters.push_back(porder);
33
34         //shhh.flows
35         CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(plookup);
36                 CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "","",false,false); parameters.push_back(pcutoff);
37                 CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(pmaxiter);
38         CommandParameter plarge("large", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(plarge);
39                 CommandParameter psigma("sigma", "Number", "", "60", "", "", "","",false,false); parameters.push_back(psigma);
40                 CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "","",false,false); parameters.push_back(pmindelta);
41         
42         //trim.seqs parameters
43         CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "","",false,false); parameters.push_back(pallfiles);
44         CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pflip);
45                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig);
46                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength);
47                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength);
48                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward);
49         CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pkeepfirst);
50                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "","",false,false); parameters.push_back(premovelast);
51
52         
53         CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
54                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
55                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
56                 
57                 vector<string> myArray;
58                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
59                 return myArray;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "SffMultipleCommand", "setParameters");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 string SffMultipleCommand::getHelpString(){     
68         try {
69                 string helpString = "";
70                 helpString += "The sff.multiple command reads a file containing sff filenames and optional oligos filenames. It runs the files through sffinfo, trim.flows, shhh.flows and trim.seqs combining the results.\n";
71                 helpString += "The sff.multiple command parameters are: ";
72         vector<string> parameters = setParameters();
73         for (int i = 0; i < parameters.size()-1; i++) {
74             helpString += parameters[i] + ", ";
75         }
76         helpString += parameters[parameters.size()-1] + ".\n";
77                 helpString += "The file parameter allows you to enter the a file containing the list of sff files and optional oligos files.\n";
78         helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores generated by sffinfo trimmed to the clipQualLeft and clipQualRight values.  Default=True. \n";
79         helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
80                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
81                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
82                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
83                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
84                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
85                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
86         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
87                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
88                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
89                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
90                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
91                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
92
93                 helpString += "Example sff.multiple(file=mySffOligosFile.txt, trim=F).\n";
94                 helpString += "Note: No spaces between parameter labels (i.e. file), '=' and parameters (i.e.mySffOligosFile.txt).\n";
95                 return helpString;
96         }
97         catch(exception& e) {
98                 m->errorOut(e, "SffMultipleCommand", "getHelpString");
99                 exit(1);
100         }
101 }
102 //**********************************************************************************************************************
103 string SffMultipleCommand::getOutputPattern(string type) {
104     try {
105         string pattern = "";
106         
107         if (type == "fasta") {  pattern = "[filename],fasta"; } 
108         else if (type == "name") {  pattern = "[filename],names"; } 
109         else if (type == "group") {  pattern = "[filename],groups"; }
110         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
111         
112         return pattern;
113     }
114     catch(exception& e) {
115         m->errorOut(e, "SffMultipleCommand", "getOutputPattern");
116         exit(1);
117     }
118 }
119 //**********************************************************************************************************************
120 SffMultipleCommand::SffMultipleCommand(){       
121         try {
122                 abort = true; calledHelp = true; 
123                 setParameters();
124                 vector<string> tempOutNames;
125                 outputTypes["fasta"] = tempOutNames;
126         outputTypes["name"] = tempOutNames;
127         outputTypes["group"] = tempOutNames;
128         }
129         catch(exception& e) {
130                 m->errorOut(e, "SffMultipleCommand", "SffMultipleCommand");
131                 exit(1);
132         }
133 }
134 //**********************************************************************************************************************
135
136 SffMultipleCommand::SffMultipleCommand(string option)  {
137         try {
138                 abort = false; calledHelp = false;  append=false; makeGroup=false;
139                 
140                 //allow user to run help
141                 if(option == "help") { help(); abort = true; calledHelp = true; }
142                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
143                 
144                 else {
145                         //valid paramters for this command
146                         vector<string> myArray = setParameters();
147                         
148                         OptionParser parser(option);
149                         map<string, string> parameters = parser.getParameters();
150                         
151                         ValidParameters validParameter;
152             map<string,string>::iterator it;
153             
154                         //check to make sure all parameters are valid for command
155                         for (it = parameters.begin(); it != parameters.end(); it++) { 
156                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
157                         }
158                         
159                         //initialize outputTypes
160                         vector<string> tempOutNames;
161                         outputTypes["fasta"] = tempOutNames;
162             outputTypes["name"] = tempOutNames;
163             outputTypes["group"] = tempOutNames;
164
165                         
166                         //if the user changes the output directory command factory will send this info to us in the output parameter 
167                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
168                         
169                         //if the user changes the input directory command factory will send this info to us in the output parameter 
170                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
171                         if (inputDir == "not found"){   inputDir = "";          }
172                         else {
173                                 string path;
174                 it = parameters.find("file");
175                                 //user has given a template file
176                                 if(it != parameters.end()){ 
177                                         path = m->hasPath(it->second);
178                                         //if the user has not given a path then, add inputdir. else leave path alone.
179                                         if (path == "") {       parameters["file"] = inputDir + it->second;             }
180                                 }
181                 
182                 it = parameters.find("lookup");
183                                 //user has given a template file
184                                 if(it != parameters.end()){ 
185                                         path = m->hasPath(it->second);
186                                         //if the user has not given a path then, add inputdir. else leave path alone.
187                                         if (path == "") {       parameters["lookup"] = inputDir + it->second;           }
188                                 }
189                         }
190             
191                         filename = validParameter.validFile(parameters, "file", true);
192             if (filename == "not open") { filename = ""; abort = true; }
193             else if (filename == "not found") { filename = "";  }
194                         
195                         string temp;
196                         temp = validParameter.validFile(parameters, "trim", false);                                     if (temp == "not found"){       temp = "T";                             }
197                         trim = m->isTrue(temp); 
198             
199             temp = validParameter.validFile(parameters, "minflows", false);     if (temp == "not found") { temp = "450"; }
200                         m->mothurConvert(temp, minFlows);  
201             
202                         temp = validParameter.validFile(parameters, "maxflows", false); if (temp == "not found") { temp = "450"; }
203                         m->mothurConvert(temp, maxFlows);  
204             
205             temp = validParameter.validFile(parameters, "maxhomop", false);             if (temp == "not found"){       temp = "9";             }
206                         m->mothurConvert(temp, maxHomoP);  
207             
208                         temp = validParameter.validFile(parameters, "signal", false);           if (temp == "not found"){       temp = "0.50";  }
209                         m->mothurConvert(temp, signal);  
210             
211                         temp = validParameter.validFile(parameters, "noise", false);            if (temp == "not found"){       temp = "0.70";  }
212                         m->mothurConvert(temp, noise);  
213             
214                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found"){       temp = "0";             }
215                         m->mothurConvert(temp, bdiffs);
216                         
217                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found"){       temp = "0";             }
218                         m->mothurConvert(temp, pdiffs);
219                         
220             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
221                         m->mothurConvert(temp, ldiffs);
222             
223             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
224                         m->mothurConvert(temp, sdiffs);
225                         
226                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
227                         m->mothurConvert(temp, tdiffs);
228                         
229                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
230             
231                         
232                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
233                         m->setProcessors(temp);
234                         m->mothurConvert(temp, processors);
235             
236                         flowOrder = validParameter.validFile(parameters, "order", false);
237                         if (flowOrder == "not found"){ flowOrder = "TACG";              }
238                         else if(flowOrder.length() != 4){
239                                 m->mothurOut("The value of the order option must be four bases long\n");
240                         }
241             
242             temp = validParameter.validFile(parameters, "cutoff", false);       if (temp == "not found"){       temp = "0.01";          }
243                         m->mothurConvert(temp, cutoff); 
244                         
245                         temp = validParameter.validFile(parameters, "mindelta", false); if (temp == "not found"){       temp = "0.000001";      }
246                         minDelta = temp; 
247             
248                         temp = validParameter.validFile(parameters, "maxiter", false);  if (temp == "not found"){       temp = "1000";          }
249                         m->mothurConvert(temp, maxIters); 
250             
251             temp = validParameter.validFile(parameters, "large", false);        if (temp == "not found"){       temp = "0";             }
252                         m->mothurConvert(temp, largeSize); 
253             if (largeSize != 0) { large = true; }
254             else { large = false;  }
255             if (largeSize < 0) {  m->mothurOut("The value of the large cannot be negative.\n"); }
256             
257                         temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found")    {       temp = "60";            }
258                         m->mothurConvert(temp, sigma); 
259             
260             temp = validParameter.validFile(parameters, "flip", false);
261                         if (temp == "not found")    {   flip = 0;       }
262                         else {  flip = m->isTrue(temp);         }
263                         
264                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
265                         m->mothurConvert(temp, maxAmbig);  
266                        
267                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
268                         m->mothurConvert(temp, minLength); 
269                         
270                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
271                         m->mothurConvert(temp, maxLength);
272                                                 
273                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
274                         convert(temp, keepFirst);
275             
276                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
277                         convert(temp, removeLast);
278                         
279                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
280                         allFiles = m->isTrue(temp);
281             
282             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
283                         keepforward = m->isTrue(temp);
284             
285             temp = validParameter.validFile(parameters, "lookup", true);
286                         if (temp == "not found")        {       
287                                 lookupFileName = "LookUp_Titanium.pat"; 
288                                 
289                                 int ableToOpen;
290                                 ifstream in;
291                                 ableToOpen = m->openInputFile(lookupFileName, in, "noerror");
292                                 in.close();     
293                                 
294                                 //if you can't open it, try input location
295                                 if (ableToOpen == 1) {
296                                         if (inputDir != "") { //default path is set
297                                                 string tryPath = inputDir + lookupFileName;
298                                                 m->mothurOut("Unable to open " + lookupFileName + ". Trying input directory " + tryPath); m->mothurOutEndLine();
299                                                 ifstream in2;
300                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
301                                                 in2.close();
302                                                 lookupFileName = tryPath;
303                                         }
304                                 }
305                                 
306                                 //if you can't open it, try default location
307                                 if (ableToOpen == 1) {
308                                         if (m->getDefaultPath() != "") { //default path is set
309                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(lookupFileName);
310                                                 m->mothurOut("Unable to open " + lookupFileName + ". Trying default " + tryPath); m->mothurOutEndLine();
311                                                 ifstream in2;
312                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
313                                                 in2.close();
314                                                 lookupFileName = tryPath;
315                                         }
316                                 }
317                                 
318                                 //if you can't open it its not in current working directory or inputDir, try mothur excutable location
319                                 if (ableToOpen == 1) {
320                                         string exepath = m->argv;
321                                         string tempPath = exepath;
322                                         for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
323                                         exepath = exepath.substr(0, (tempPath.find_last_of('m')));
324                                         
325                                         string tryPath = m->getFullPathName(exepath) + m->getSimpleName(lookupFileName);
326                                         m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
327                                         ifstream in2;
328                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
329                                         in2.close();
330                                         lookupFileName = tryPath;
331                                 }
332                                 
333                                 if (ableToOpen == 1) {  m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true;  }
334                         }
335                         else if(temp == "not open")     {       
336                                 
337                                 lookupFileName = validParameter.validFile(parameters, "lookup", false);
338                                 
339                                 //if you can't open it its not inputDir, try mothur excutable location
340                                 string exepath = m->argv;
341                                 string tempPath = exepath;
342                                 for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
343                                 exepath = exepath.substr(0, (tempPath.find_last_of('m')));
344                 
345                                 string tryPath = m->getFullPathName(exepath) + lookupFileName;
346                                 m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
347                                 ifstream in2;
348                                 int ableToOpen = m->openInputFile(tryPath, in2, "noerror");
349                                 in2.close();
350                                 lookupFileName = tryPath;
351                                 
352                                 if (ableToOpen == 1) {  m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true;  }
353                         }else                                           {       lookupFileName = temp;  }
354                 }
355         }
356         catch(exception& e) {
357                 m->errorOut(e, "SffMultipleCommand", "SffMultipleCommand");
358                 exit(1);
359         }
360 }
361 //**********************************************************************************************************************
362 int SffMultipleCommand::execute(){
363         try {
364                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
365                 
366                 vector<string> sffFiles, oligosFiles;
367         readFile(sffFiles, oligosFiles);
368         
369         outputDir = m->hasPath(filename);
370         string fileroot = outputDir + m->getRootName(m->getSimpleName(filename));
371         map<string, string> variables; 
372                 variables["[filename]"] = fileroot;
373         string fasta = fileroot + getOutputFileName("fasta",variables);
374         string name = fileroot + getOutputFileName("name",variables);
375         string group = fileroot + getOutputFileName("group",variables);
376         
377         if (m->control_pressed) { return 0; }
378         
379         if (sffFiles.size() < processors) { processors = sffFiles.size(); }
380         
381 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
382 #else
383         //trim.flows, shhh.flows cannot handle multiple processors for windows.
384         processors = 1; m->mothurOut("This command can only use 1 processor on Windows platforms, using 1 processors.\n\n");
385 #endif
386         if (processors == 1) { driver(sffFiles, oligosFiles, 0, sffFiles.size(), fasta, name, group); }
387         else { createProcesses(sffFiles, oligosFiles, fasta, name, group); } 
388                 
389                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } return 0; }
390                 
391         if (append) { 
392             outputNames.push_back(fasta); outputTypes["fasta"].push_back(fasta);
393             m->setFastaFile(fasta);
394             outputNames.push_back(name); outputTypes["name"].push_back(name);
395             m->setNameFile(name);
396             if (makeGroup) { outputNames.push_back(group); outputTypes["group"].push_back(group); m->setGroupFile(group); }
397         }
398         
399                 //report output filenames
400                 m->mothurOutEndLine();
401                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
402                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
403                 m->mothurOutEndLine();
404         
405                 return 0;
406         }
407         catch(exception& e) {
408                 m->errorOut(e, "SffMultipleCommand", "execute");
409                 exit(1);
410         }
411 }
412 //**********************************************************************************************************************
413 int SffMultipleCommand::readFile(vector<string>& sffFiles, vector<string>& oligosFiles){
414         try {
415         
416         ifstream in;
417         m->openInputFile(filename, in);
418         bool allBlank = true;
419         bool allFull = true;
420         
421         string oligos, sff;
422         while (!in.eof()) {
423             
424             if (m->control_pressed) { break; }
425             
426             in >> sff;
427             
428             sff = m->getFullPathName(sff);
429             
430             //ignore file pairing
431             if(sff[0] == '#'){ while (!in.eof())        {       char c = in.get();  if (c == 10 || c == 13){    break;  }       } m->gobble(in); }
432             else { //check for oligos file
433                 oligos = "";
434             
435                 // get rest of line in case there is a oligos filename
436                 while (!in.eof())       {       
437                     char c = in.get(); 
438                     if (c == 10 || c == 13){    break;  }
439                     else if (c == 32 || c == 9){;} //space or tab
440                     else {      oligos += c;  }
441                 } 
442                 sffFiles.push_back(sff);
443                 if (oligos != "") { oligos = m->getFullPathName(oligos); allBlank = false;  }
444                 if (oligos == "") { allFull = false;  }
445                 oligosFiles.push_back(oligos); //will push a blank if there is not an oligos for this sff file
446             }
447             m->gobble(in);
448         }
449         in.close();
450         
451         if (allBlank || allFull) { append = true; }
452         if (allFull) { makeGroup = true; }
453         
454         return 0;
455     }
456         catch(exception& e) {
457                 m->errorOut(e, "SffMultipleCommand", "readFile");
458                 exit(1);
459         }
460 }
461 //**********************************************************************************************************************
462 //runs sffinfo, summary.seqs, trim.flows, shhh.flows, trim.seqs, summary.seqs for each sff file.
463 int SffMultipleCommand::driver(vector<string> sffFiles, vector<string> oligosFiles, int start, int end, string fasta, string name, string group){
464     try {
465         m->mothurRemove(fasta); m->mothurRemove(name); m->mothurRemove(group);
466         int count = 0;
467         for (int s = start; s < end; s++) {
468             
469             string sff = sffFiles[s];
470             string oligos = oligosFiles[s];
471             
472             m->mothurOut("\n>>>>>\tProcessing " + sff + " (file " + toString(s+1) + " of " + toString(sffFiles.size()) + ")\t<<<<<\n");
473             
474             //run sff.info
475             string inputString = "sff=" + sff + ", flow=T";
476             if (trim) { inputString += ", trim=T"; }
477             m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
478             m->mothurOut("Running command: sffinfo(" + inputString + ")"); m->mothurOutEndLine(); 
479             m->mothurCalling = true;
480             
481             Command* sffCommand = new SffInfoCommand(inputString);
482             sffCommand->execute();
483             
484             if (m->control_pressed){ break; }
485             
486             map<string, vector<string> > filenames = sffCommand->getOutputFiles();
487             
488             delete sffCommand;
489             m->mothurCalling = false;
490             m->mothurOutEndLine(); 
491             
492             //run summary.seqs on the fasta file
493             string fastaFile = "";
494             map<string, vector<string> >::iterator it = filenames.find("fasta");
495             if (it != filenames.end()) {  if ((it->second).size() != 0) { fastaFile = (it->second)[0];  } }
496             else {  m->mothurOut("[ERROR]: sffinfo did not create a fasta file, quitting.\n"); m->control_pressed = true; break;  }
497             
498             inputString = "fasta=" + fastaFile + ", processors=1";
499             m->mothurOutEndLine(); 
500             m->mothurOut("Running command: summary.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
501             m->mothurCalling = true;
502             
503             Command* summarySeqsCommand = new SeqSummaryCommand(inputString);
504             summarySeqsCommand->execute();
505             
506             if (m->control_pressed){ break; }
507             
508             map<string, vector<string> > temp = summarySeqsCommand->getOutputFiles();
509             mergeOutputFileList(filenames, temp);
510             
511             delete summarySeqsCommand;
512             m->mothurCalling = false;
513             
514             m->mothurOutEndLine(); 
515             
516             //run trim.flows on the fasta file
517             string flowFile = "";
518             it = filenames.find("flow");
519             if (it != filenames.end()) {  if ((it->second).size() != 0) { flowFile = (it->second)[0];  } }
520             else {  m->mothurOut("[ERROR]: sffinfo did not create a flow file, quitting.\n"); m->control_pressed = true; break;  }
521             
522             inputString = "flow=" + flowFile;
523             if (oligos != "") { inputString += ", oligos=" + oligos; }
524             inputString += ", maxhomop=" + toString(maxHomoP) + ", maxflows=" + toString(maxFlows) + ", minflows=" + toString(minFlows);
525             inputString += ", pdiffs=" + toString(pdiffs) + ", bdiffs=" + toString(bdiffs) + ", ldiffs=" + toString(ldiffs) + ", sdiffs=" + toString(sdiffs);
526             inputString += ", tdiffs=" + toString(tdiffs) + ", signal=" + toString(signal) + ", noise=" + toString(noise) + ", order=" + flowOrder + ", processors=1";
527             
528             m->mothurOutEndLine(); 
529             m->mothurOut("Running command: trim.flows(" + inputString + ")"); m->mothurOutEndLine(); 
530             m->mothurCalling = true;
531             
532             Command* trimFlowCommand = new TrimFlowsCommand(inputString);
533             trimFlowCommand->execute();
534             
535             if (m->control_pressed){ break; }
536             
537             temp = trimFlowCommand->getOutputFiles();
538             mergeOutputFileList(filenames, temp);
539             
540             delete trimFlowCommand;
541             m->mothurCalling = false;
542             
543             
544             string fileFileName = "";
545             flowFile = "";
546             if (oligos != "") { 
547                 it = temp.find("file");
548                 if (it != temp.end()) {  if ((it->second).size() != 0) { fileFileName = (it->second)[0];  } }
549                 else {  m->mothurOut("[ERROR]: trim.flows did not create a file file, quitting.\n"); m->control_pressed = true; break;  }
550             }else {
551                 vector<string> flowFiles;
552                 it = temp.find("flow");
553                 if (it != temp.end()) {  if ((it->second).size() != 0) { flowFiles = (it->second);  } }
554                 else {  m->mothurOut("[ERROR]: trim.flows did not create a flow file, quitting.\n"); m->control_pressed = true; break;  }
555                 
556                 for (int i = 0; i < flowFiles.size(); i++) {
557                     string end = flowFiles[i].substr(flowFiles[i].length()-9);
558                     if (end == "trim.flow") {
559                         flowFile = flowFiles[i]; i+=flowFiles.size(); //if we found the trim.flow file stop looking
560                     }
561                 }
562             }
563             
564             if ((fileFileName == "") && (flowFile == "")) { m->mothurOut("[ERROR]: trim.flows did not create a file file or a trim.flow file, quitting.\n"); m->control_pressed = true; break;  }
565             
566             if (fileFileName != "") { inputString = "file=" + fileFileName; }
567             else { inputString = "flow=" + flowFile; }
568             
569             inputString += ", lookup=" + lookupFileName + ", cutoff=" + toString(cutoff); + ", maxiters=" + toString(maxIters);
570             if (large) { inputString += ", large=" + toString(largeSize); }
571             inputString += ", sigma=" +toString(sigma);
572             inputString += ", mindelta=" + toString(minDelta);  
573             inputString += ", order=" + flowOrder + ", processors=1";
574             
575             //run shhh.flows
576             m->mothurOutEndLine(); 
577             m->mothurOut("Running command: shhh.flows(" + inputString + ")"); m->mothurOutEndLine(); 
578             m->mothurCalling = true;
579             
580             Command* shhhFlowCommand = new ShhherCommand(inputString);
581             shhhFlowCommand->execute();
582             
583             if (m->control_pressed){ break; }
584             
585             temp = shhhFlowCommand->getOutputFiles();
586             mergeOutputFileList(filenames, temp);
587             
588             delete shhhFlowCommand;
589             m->mothurCalling = false;
590             
591             vector<string> fastaFiles;
592             vector<string> nameFiles;
593             it = temp.find("fasta");
594             if (it != temp.end()) {  if ((it->second).size() != 0) { fastaFiles = (it->second);  } }
595             else {  m->mothurOut("[ERROR]: shhh.flows did not create a fasta file, quitting.\n"); m->control_pressed = true; break;  }
596            
597             it = temp.find("name");
598             if (it != temp.end()) {  if ((it->second).size() != 0) { nameFiles = (it->second);  } }
599             else {  m->mothurOut("[ERROR]: shhh.flows did not create a name file, quitting.\n"); m->control_pressed = true; break;  }
600             
601             //find fasta and name files with the shortest name.  This is because if there is a composite name it will be the shortest.
602             fastaFile = fastaFiles[0];
603             for (int i = 1; i < fastaFiles.size(); i++) { if (fastaFiles[i].length() < fastaFile.length()) { fastaFile = fastaFiles[i]; } }
604             string nameFile = nameFiles[0];
605             for (int i = 1; i < nameFiles.size(); i++) { if (nameFiles[i].length() < nameFile.length()) { nameFile = nameFiles[i]; } }
606             
607             inputString = "fasta=" + fastaFile + ", name=" + nameFile;
608             if (oligos != "") { inputString += ", oligos=" + oligos; }
609             if (allFiles) { inputString += ", allfiles=t"; }
610             else { inputString += ", allfiles=f";  }
611             if (flip) { inputString += ", flip=t"; }
612             else { inputString += ", flip=f";  }
613             if (keepforward) { inputString += ", keepforward=t"; }
614             else { inputString += ", keepforward=f";  }
615             
616             
617             inputString += ", pdiffs=" + toString(pdiffs) + ", bdiffs=" + toString(bdiffs) + ", ldiffs=" + toString(ldiffs) + ", sdiffs=" + toString(sdiffs);
618             inputString += ", tdiffs=" + toString(tdiffs) + ", maxambig=" + toString(maxAmbig) + ", minlength=" + toString(minLength) + ", maxlength=" + toString(maxLength);
619             if (keepFirst != 0) { inputString += ", keepfirst=" + toString(keepFirst); }
620             if (removeLast != 0) { inputString += ", removelast=" + toString(removeLast); }
621             inputString += ", processors=1";
622             
623             //run trim.seqs
624             m->mothurOutEndLine(); 
625             m->mothurOut("Running command: trim.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
626             m->mothurCalling = true;
627             
628             Command* trimseqsCommand = new TrimSeqsCommand(inputString);
629             trimseqsCommand->execute();
630             
631             if (m->control_pressed){ break; }
632             
633             temp = trimseqsCommand->getOutputFiles();
634             mergeOutputFileList(filenames, temp);
635             
636             delete trimseqsCommand;
637             m->mothurCalling = false;
638             
639             it = temp.find("fasta");
640             if (it != temp.end()) {  if ((it->second).size() != 0) { fastaFiles = (it->second);  } }
641             else {  m->mothurOut("[ERROR]: trim.seqs did not create a fasta file, quitting.\n"); m->control_pressed = true; break;  }
642             
643             for (int i = 0; i < fastaFiles.size(); i++) {
644                 string end = fastaFiles[i].substr(fastaFiles[i].length()-10);
645                 if (end == "trim.fasta") {
646                     fastaFile = fastaFiles[i]; i+=fastaFiles.size(); //if we found the trim.fasta file stop looking
647                 }
648             }
649             
650             it = temp.find("name");
651             if (it != temp.end()) {  if ((it->second).size() != 0) { nameFiles = (it->second);  } }
652             else {  m->mothurOut("[ERROR]: trim.seqs did not create a name file, quitting.\n"); m->control_pressed = true; break;  }
653             
654             for (int i = 0; i < nameFiles.size(); i++) {
655                 string end = nameFiles[i].substr(nameFiles[i].length()-10);
656                 if (end == "trim.names") {
657                     nameFile = nameFiles[i]; i+=nameFiles.size(); //if we found the trim.names file stop looking
658                 }
659             }
660             
661             vector<string> groupFiles;
662             string groupFile = "";
663             if (makeGroup) {
664                 it = temp.find("group");
665                 if (it != temp.end()) {  if ((it->second).size() != 0) { groupFiles = (it->second);  } }
666             
667                 //find group file with the shortest name.  This is because if there is a composite group file it will be the shortest.
668                 groupFile = groupFiles[0];
669                 for (int i = 1; i < groupFiles.size(); i++) { if (groupFiles[i].length() < groupFile.length()) { groupFile = groupFiles[i]; } }
670             }
671             
672             inputString = "fasta=" + fastaFile + ", processors=1, name=" + nameFile;
673             m->mothurOutEndLine(); 
674             m->mothurOut("Running command: summary.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
675             m->mothurCalling = true;
676             
677             summarySeqsCommand = new SeqSummaryCommand(inputString);
678             summarySeqsCommand->execute();
679             
680             if (m->control_pressed){ break; }
681             
682             temp = summarySeqsCommand->getOutputFiles();
683             mergeOutputFileList(filenames, temp);
684             
685             delete summarySeqsCommand;
686             m->mothurCalling = false;
687             
688             m->mothurOutEndLine(); 
689             m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
690             
691             if (append) {
692                 m->appendFiles(fastaFile, fasta);
693                 m->appendFiles(nameFile, name);
694                 if (makeGroup) { m->appendFiles(groupFile, group);  }
695             }
696             count++;
697             
698             for (it = filenames.begin(); it != filenames.end(); it++) {
699                 for (int i = 0; i < (it->second).size(); i++) {
700                     outputNames.push_back((it->second)[i]); outputTypes[it->first].push_back((it->second)[i]);
701                 }
702             }
703         }
704         
705         return count;
706     }
707         catch(exception& e) {
708                 m->errorOut(e, "SffMultipleCommand", "driver");
709                 exit(1);
710         }
711 }
712 //**********************************************************************************************************************
713 int SffMultipleCommand::mergeOutputFileList(map<string, vector<string> >& files, map<string, vector<string> >& temp){
714     try {
715         map<string, vector<string> >::iterator it;
716         for (it = temp.begin(); it != temp.end(); it++) {
717             map<string, vector<string> >::iterator it2 = files.find(it->first);
718             if (it2 == files.end()) { //we do not already have this type so just add it
719                 files[it->first] = it->second;
720             }else { //merge them
721                 for (int i = 0; i < (it->second).size(); i++) {
722                     files[it->first].push_back((it->second)[i]);
723                 }
724             }
725         }
726         
727         return 0;
728     }
729         catch(exception& e) {
730                 m->errorOut(e, "SffMultipleCommand", "mergeOutputFileList");
731                 exit(1);
732         }
733 }
734 //**********************************************************************************************************************
735 int SffMultipleCommand::createProcesses(vector<string> sffFiles, vector<string> oligosFiles, string fasta, string name, string group){
736     try {
737         vector<int> processIDS;
738                 int process = 1;
739                 int num = 0;
740                                 
741                 //divide the groups between the processors
742                 vector<linePair> lines;
743         vector<int> numFilesToComplete;
744                 int numFilesPerProcessor = sffFiles.size() / processors;
745                 for (int i = 0; i < processors; i++) {
746                         int startIndex =  i * numFilesPerProcessor;
747                         int endIndex = (i+1) * numFilesPerProcessor;
748                         if(i == (processors - 1)){      endIndex = sffFiles.size();     }
749                         lines.push_back(linePair(startIndex, endIndex));
750             numFilesToComplete.push_back((endIndex-startIndex));
751                 }
752                 
753 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
754                 
755                 //loop through and create all the processes you want
756                 while (process != processors) {
757                         int pid = fork();
758                         
759                         if (pid > 0) {
760                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
761                                 process++;
762                         }else if (pid == 0){
763                                 num = driver(sffFiles, oligosFiles, lines[process].start, lines[process].end, fasta + toString(getpid()) + ".temp", name  + toString(getpid()) + ".temp", group  + toString(getpid()) + ".temp");
764                 
765                 //pass numSeqs to parent
766                                 ofstream out;
767                                 string tempFile = toString(getpid()) + ".num.temp";
768                                 m->openOutputFile(tempFile, out);
769                                 out << num << '\t' << outputNames.size() << endl;
770                 for (int i = 0; i < outputNames.size(); i++) {  out << outputNames[i] << endl;  }
771                                 out.close();
772                 
773                                 exit(0);
774                         }else { 
775                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
776                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
777                                 exit(0);
778                         }
779                 }
780                 
781                 //do my part
782                 num = driver(sffFiles, oligosFiles, lines[0].start, lines[0].end, fasta, name, group);
783                 
784                 //force parent to wait until all the processes are done
785                 for (int i=0;i<processIDS.size();i++) { 
786                         int temp = processIDS[i];
787                         wait(&temp);
788                 }
789         
790         for (int i=0;i<processIDS.size();i++) { 
791             ifstream in;
792                         string tempFile = toString(processIDS[i]) + ".num.temp";
793                         m->openInputFile(tempFile, in);
794                         if (!in.eof()) { 
795                 int tempNum = 0; int outputNamesSize = 0; 
796                 in >> tempNum >> outputNamesSize; m->gobble(in);
797                 for (int j = 0; j < outputNamesSize; j++) {
798                     string tempName;
799                     in >> tempName; m->gobble(in);
800                     outputNames.push_back(tempName);
801                 }
802                 if (tempNum != numFilesToComplete[i+1]) {
803                     m->mothurOut("[ERROR]: main process expected " + toString(processIDS[i]) + " to complete " + toString(numFilesToComplete[i+1]) + " files, and it only reported completing " + toString(tempNum) + ". This will cause file mismatches.  The flow files may be too large to process with multiple processors. \n");
804                 }
805             }
806                         in.close(); m->mothurRemove(tempFile);
807             
808             if (append) {
809                 m->appendFiles(fasta+toString(processIDS[i])+".temp", fasta);   m->mothurRemove(fasta+toString(processIDS[i])+".temp");
810                 m->appendFiles(name+toString(processIDS[i])+".temp", name);     m->mothurRemove(name+toString(processIDS[i])+".temp");
811                 if (makeGroup) { m->appendFiles(group+toString(processIDS[i])+".temp", group);  m->mothurRemove(group+toString(processIDS[i])+".temp"); }
812             }
813         }
814 #endif
815         return 0;
816         
817     }
818         catch(exception& e) {
819                 m->errorOut(e, "ShhherCommand", "createProcesses");
820                 exit(1);
821         }
822 }
823 //**********************************************************************************************************************
824
825
826
827