]> git.donarmstrong.com Git - mothur.git/blob - sffmultiplecommand.cpp
added sff.multiple command. fixed issue with windows paralellization in chimera...
[mothur.git] / sffmultiplecommand.cpp
1 //
2 //  sffmultiplecommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 8/14/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "sffmultiplecommand.h"
10
11
12
13 //**********************************************************************************************************************
14 vector<string> SffMultipleCommand::setParameters(){     
15         try {           
16                 CommandParameter pfile("file", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfile);
17         
18         //sffinfo
19                 CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(ptrim);
20         
21         //trim.flows
22                 CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "",false,false); parameters.push_back(pmaxhomop);
23                 CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "",false,false); parameters.push_back(pmaxflows);
24                 CommandParameter pminflows("minflows", "Number", "", "450", "", "", "",false,false); parameters.push_back(pminflows);
25                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
26                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
27         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
28                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
29         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
30                 CommandParameter psignal("signal", "Number", "", "0.50", "", "", "",false,false); parameters.push_back(psignal);
31                 CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pnoise);
32                 CommandParameter porder("order", "String", "", "TACG", "", "", "",false,false); parameters.push_back(porder);
33
34         //shhh.flows
35         CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plookup);
36                 CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "",false,false); parameters.push_back(pcutoff);
37                 CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "",false,false); parameters.push_back(pmaxiter);
38         CommandParameter plarge("large", "Number", "", "-1", "", "", "",false,false); parameters.push_back(plarge);
39                 CommandParameter psigma("sigma", "Number", "", "60", "", "", "",false,false); parameters.push_back(psigma);
40                 CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "",false,false); parameters.push_back(pmindelta);
41         
42         //trim.seqs parameters
43         CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "",false,false); parameters.push_back(pallfiles);
44         CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
45                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
46                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
47                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
48                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
49         CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
50                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
51
52         
53         CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
54                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
55                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
56                 
57                 vector<string> myArray;
58                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
59                 return myArray;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "SffMultipleCommand", "setParameters");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 string SffMultipleCommand::getHelpString(){     
68         try {
69                 string helpString = "";
70                 helpString += "The sff.multiple command reads a file containing sff filenames and optional oligos filenames. It runs the files through sffinfo, trim.flows, shhh.flows and trim.seqs combining the results.\n";
71                 helpString += "The sff.multiple command parameters are: ";
72         vector<string> parameters = setParameters();
73         for (int i = 0; i < parameters.size()-1; i++) {
74             helpString += parameters[i] + ", ";
75         }
76         helpString += parameters[parameters.size()-1] + ".\n";
77                 helpString += "The file parameter allows you to enter the a file containing the list of sff files and optional oligos files.\n";
78         helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores generated by sffinfo trimmed to the clipQualLeft and clipQualRight values.  Default=True. \n";
79         helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
80                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
81                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
82                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
83                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
84                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
85                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
86         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
87                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
88                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
89                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
90                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
91                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
92
93                 helpString += "Example sff.multiple(file=mySffOligosFile.txt, trim=F).\n";
94                 helpString += "Note: No spaces between parameter labels (i.e. file), '=' and parameters (i.e.mySffOligosFile.txt).\n";
95                 return helpString;
96         }
97         catch(exception& e) {
98                 m->errorOut(e, "SffMultipleCommand", "getHelpString");
99                 exit(1);
100         }
101 }
102 //**********************************************************************************************************************
103 string SffMultipleCommand::getOutputFileNameTag(string type, string inputName=""){      
104         try {
105         string outputFileName = "";
106                 map<string, vector<string> >::iterator it;
107         
108         //is this a type this command creates
109         it = outputTypes.find(type);
110         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
111         else {
112             if (type == "fasta")            {   outputFileName =  "fasta";   }
113             else if (type == "name")    {   outputFileName =  "names";   }
114             else if (type == "group")        {   outputFileName =  "groups";   }
115             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
116         }
117         return outputFileName;
118         }
119         catch(exception& e) {
120                 m->errorOut(e, "SffMultipleCommand", "getOutputFileNameTag");
121                 exit(1);
122         }
123 }
124
125
126 //**********************************************************************************************************************
127 SffMultipleCommand::SffMultipleCommand(){       
128         try {
129                 abort = true; calledHelp = true; 
130                 setParameters();
131                 vector<string> tempOutNames;
132                 outputTypes["fasta"] = tempOutNames;
133         outputTypes["name"] = tempOutNames;
134         outputTypes["group"] = tempOutNames;
135                 outputTypes["flow"] = tempOutNames;
136                 outputTypes["qfile"] = tempOutNames;
137         }
138         catch(exception& e) {
139                 m->errorOut(e, "SffMultipleCommand", "SffMultipleCommand");
140                 exit(1);
141         }
142 }
143 //**********************************************************************************************************************
144
145 SffMultipleCommand::SffMultipleCommand(string option)  {
146         try {
147                 abort = false; calledHelp = false;  append=false; makeGroup=false;
148                 
149                 //allow user to run help
150                 if(option == "help") { help(); abort = true; calledHelp = true; }
151                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
152                 
153                 else {
154                         //valid paramters for this command
155                         vector<string> myArray = setParameters();
156                         
157                         OptionParser parser(option);
158                         map<string, string> parameters = parser.getParameters();
159                         
160                         ValidParameters validParameter;
161             map<string,string>::iterator it;
162             
163                         //check to make sure all parameters are valid for command
164                         for (it = parameters.begin(); it != parameters.end(); it++) { 
165                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
166                         }
167                         
168                         //initialize outputTypes
169                         vector<string> tempOutNames;
170                         outputTypes["fasta"] = tempOutNames;
171                         outputTypes["flow"] = tempOutNames;
172                         outputTypes["qfile"] = tempOutNames;
173             outputTypes["name"] = tempOutNames;
174             outputTypes["group"] = tempOutNames;
175
176                         
177                         //if the user changes the output directory command factory will send this info to us in the output parameter 
178                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
179                         
180                         //if the user changes the input directory command factory will send this info to us in the output parameter 
181                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
182                         if (inputDir == "not found"){   inputDir = "";          }
183                         else {
184                                 string path;
185                 it = parameters.find("file");
186                                 //user has given a template file
187                                 if(it != parameters.end()){ 
188                                         path = m->hasPath(it->second);
189                                         //if the user has not given a path then, add inputdir. else leave path alone.
190                                         if (path == "") {       parameters["file"] = inputDir + it->second;             }
191                                 }
192                 
193                 it = parameters.find("lookup");
194                                 //user has given a template file
195                                 if(it != parameters.end()){ 
196                                         path = m->hasPath(it->second);
197                                         //if the user has not given a path then, add inputdir. else leave path alone.
198                                         if (path == "") {       parameters["lookup"] = inputDir + it->second;           }
199                                 }
200                         }
201             
202                         filename = validParameter.validFile(parameters, "file", true);
203             if (filename == "not open") { filename = ""; abort = true; }
204             else if (filename == "not found") { filename = "";  }
205                         
206                         string temp;
207                         temp = validParameter.validFile(parameters, "trim", false);                                     if (temp == "not found"){       temp = "T";                             }
208                         trim = m->isTrue(temp); 
209             
210             temp = validParameter.validFile(parameters, "minflows", false);     if (temp == "not found") { temp = "450"; }
211                         m->mothurConvert(temp, minFlows);  
212             
213                         temp = validParameter.validFile(parameters, "maxflows", false); if (temp == "not found") { temp = "450"; }
214                         m->mothurConvert(temp, maxFlows);  
215             
216             temp = validParameter.validFile(parameters, "maxhomop", false);             if (temp == "not found"){       temp = "9";             }
217                         m->mothurConvert(temp, maxHomoP);  
218             
219                         temp = validParameter.validFile(parameters, "signal", false);           if (temp == "not found"){       temp = "0.50";  }
220                         m->mothurConvert(temp, signal);  
221             
222                         temp = validParameter.validFile(parameters, "noise", false);            if (temp == "not found"){       temp = "0.70";  }
223                         m->mothurConvert(temp, noise);  
224             
225                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found"){       temp = "0";             }
226                         m->mothurConvert(temp, bdiffs);
227                         
228                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found"){       temp = "0";             }
229                         m->mothurConvert(temp, pdiffs);
230                         
231             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
232                         m->mothurConvert(temp, ldiffs);
233             
234             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
235                         m->mothurConvert(temp, sdiffs);
236                         
237                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
238                         m->mothurConvert(temp, tdiffs);
239                         
240                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
241             
242                         
243                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
244                         m->setProcessors(temp);
245                         m->mothurConvert(temp, processors);
246             
247                         flowOrder = validParameter.validFile(parameters, "order", false);
248                         if (flowOrder == "not found"){ flowOrder = "TACG";              }
249                         else if(flowOrder.length() != 4){
250                                 m->mothurOut("The value of the order option must be four bases long\n");
251                         }
252             
253             temp = validParameter.validFile(parameters, "cutoff", false);       if (temp == "not found"){       temp = "0.01";          }
254                         m->mothurConvert(temp, cutoff); 
255                         
256                         temp = validParameter.validFile(parameters, "mindelta", false); if (temp == "not found"){       temp = "0.000001";      }
257                         minDelta = temp; 
258             
259                         temp = validParameter.validFile(parameters, "maxiter", false);  if (temp == "not found"){       temp = "1000";          }
260                         m->mothurConvert(temp, maxIters); 
261             
262             temp = validParameter.validFile(parameters, "large", false);        if (temp == "not found"){       temp = "0";             }
263                         m->mothurConvert(temp, largeSize); 
264             if (largeSize != 0) { large = true; }
265             else { large = false;  }
266             if (largeSize < 0) {  m->mothurOut("The value of the large cannot be negative.\n"); }
267             
268                         temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found")    {       temp = "60";            }
269                         m->mothurConvert(temp, sigma); 
270             
271             temp = validParameter.validFile(parameters, "flip", false);
272                         if (temp == "not found")    {   flip = 0;       }
273                         else {  flip = m->isTrue(temp);         }
274                         
275                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
276                         m->mothurConvert(temp, maxAmbig);  
277                        
278                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
279                         m->mothurConvert(temp, minLength); 
280                         
281                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
282                         m->mothurConvert(temp, maxLength);
283                                                 
284                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
285                         convert(temp, keepFirst);
286             
287                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
288                         convert(temp, removeLast);
289                         
290                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
291                         allFiles = m->isTrue(temp);
292             
293             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
294                         keepforward = m->isTrue(temp);
295             
296             temp = validParameter.validFile(parameters, "lookup", true);
297                         if (temp == "not found")        {       
298                                 lookupFileName = "LookUp_Titanium.pat"; 
299                                 
300                                 int ableToOpen;
301                                 ifstream in;
302                                 ableToOpen = m->openInputFile(lookupFileName, in, "noerror");
303                                 in.close();     
304                                 
305                                 //if you can't open it, try input location
306                                 if (ableToOpen == 1) {
307                                         if (inputDir != "") { //default path is set
308                                                 string tryPath = inputDir + lookupFileName;
309                                                 m->mothurOut("Unable to open " + lookupFileName + ". Trying input directory " + tryPath); m->mothurOutEndLine();
310                                                 ifstream in2;
311                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
312                                                 in2.close();
313                                                 lookupFileName = tryPath;
314                                         }
315                                 }
316                                 
317                                 //if you can't open it, try default location
318                                 if (ableToOpen == 1) {
319                                         if (m->getDefaultPath() != "") { //default path is set
320                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(lookupFileName);
321                                                 m->mothurOut("Unable to open " + lookupFileName + ". Trying default " + tryPath); m->mothurOutEndLine();
322                                                 ifstream in2;
323                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
324                                                 in2.close();
325                                                 lookupFileName = tryPath;
326                                         }
327                                 }
328                                 
329                                 //if you can't open it its not in current working directory or inputDir, try mothur excutable location
330                                 if (ableToOpen == 1) {
331                                         string exepath = m->argv;
332                                         string tempPath = exepath;
333                                         for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
334                                         exepath = exepath.substr(0, (tempPath.find_last_of('m')));
335                                         
336                                         string tryPath = m->getFullPathName(exepath) + m->getSimpleName(lookupFileName);
337                                         m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
338                                         ifstream in2;
339                                         ableToOpen = m->openInputFile(tryPath, in2, "noerror");
340                                         in2.close();
341                                         lookupFileName = tryPath;
342                                 }
343                                 
344                                 if (ableToOpen == 1) {  m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true;  }
345                         }
346                         else if(temp == "not open")     {       
347                                 
348                                 lookupFileName = validParameter.validFile(parameters, "lookup", false);
349                                 
350                                 //if you can't open it its not inputDir, try mothur excutable location
351                                 string exepath = m->argv;
352                                 string tempPath = exepath;
353                                 for (int i = 0; i < exepath.length(); i++) { tempPath[i] = tolower(exepath[i]); }
354                                 exepath = exepath.substr(0, (tempPath.find_last_of('m')));
355                 
356                                 string tryPath = m->getFullPathName(exepath) + lookupFileName;
357                                 m->mothurOut("Unable to open " + lookupFileName + ". Trying mothur's executable location " + tryPath); m->mothurOutEndLine();
358                                 ifstream in2;
359                                 int ableToOpen = m->openInputFile(tryPath, in2, "noerror");
360                                 in2.close();
361                                 lookupFileName = tryPath;
362                                 
363                                 if (ableToOpen == 1) {  m->mothurOut("Unable to open " + lookupFileName + "."); m->mothurOutEndLine(); abort=true;  }
364                         }else                                           {       lookupFileName = temp;  }
365                 }
366         }
367         catch(exception& e) {
368                 m->errorOut(e, "SffMultipleCommand", "SffMultipleCommand");
369                 exit(1);
370         }
371 }
372 //**********************************************************************************************************************
373 int SffMultipleCommand::execute(){
374         try {
375                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
376                 
377                 vector<string> sffFiles, oligosFiles;
378         readFile(sffFiles, oligosFiles);
379         
380         outputDir = m->hasPath(filename);
381         string fileroot = outputDir + m->getRootName(m->getSimpleName(filename));
382         string fasta = fileroot + getOutputFileNameTag("fasta");
383         string name = fileroot + getOutputFileNameTag("name");
384         string group = fileroot + getOutputFileNameTag("group");
385         
386         if (m->control_pressed) { return 0; }
387         
388         if (sffFiles.size() < processors) { processors = sffFiles.size(); }
389         
390 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
391 #else
392         //trim.flows, shhh.flows cannot handle multiple processors for windows.
393         processors = 1; m->mothurOut("This command can only use 1 processor on Windows platforms, using 1 processors.\n\n");
394 #endif
395         if (processors == 1) { driver(sffFiles, oligosFiles, 0, sffFiles.size(), fasta, name, group); }
396         else { createProcesses(sffFiles, oligosFiles, fasta, name, group); } 
397                 
398                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } return 0; }
399                 
400         if (append) { 
401             outputNames.push_back(fasta); outputTypes["fasta"].push_back(fasta);
402             m->setFastaFile(fasta);
403             outputNames.push_back(name); outputTypes["name"].push_back(name);
404             m->setNameFile(name);
405             if (makeGroup) { outputNames.push_back(group); outputTypes["group"].push_back(group); m->setGroupFile(group); }
406         }
407         
408                 //report output filenames
409                 m->mothurOutEndLine();
410                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
411                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
412                 m->mothurOutEndLine();
413         
414                 return 0;
415         }
416         catch(exception& e) {
417                 m->errorOut(e, "SffMultipleCommand", "execute");
418                 exit(1);
419         }
420 }
421 //**********************************************************************************************************************
422 int SffMultipleCommand::readFile(vector<string>& sffFiles, vector<string>& oligosFiles){
423         try {
424         
425         ifstream in;
426         m->openInputFile(filename, in);
427         bool allBlank = true;
428         bool allFull = true;
429         
430         string oligos, sff;
431         while (!in.eof()) {
432             
433             if (m->control_pressed) { break; }
434             
435             in >> sff;
436             
437             sff = m->getFullPathName(sff);
438             
439             //ignore file pairing
440             if(sff[0] == '#'){ while (!in.eof())        {       char c = in.get();  if (c == 10 || c == 13){    break;  }       } m->gobble(in); }
441             else { //check for oligos file
442                 oligos = "";
443             
444                 // get rest of line in case there is a oligos filename
445                 while (!in.eof())       {       
446                     char c = in.get(); 
447                     if (c == 10 || c == 13){    break;  }
448                     else if (c == 32 || c == 9){;} //space or tab
449                     else {      oligos += c;  }
450                 } 
451                 sffFiles.push_back(sff);
452                 if (oligos != "") { oligos = m->getFullPathName(oligos); allBlank = false;  }
453                 if (oligos == "") { allFull = false;  }
454                 oligosFiles.push_back(oligos); //will push a blank if there is not an oligos for this sff file
455             }
456             m->gobble(in);
457         }
458         in.close();
459         
460         if (allBlank || allFull) { append = true; }
461         if (allFull) { makeGroup = true; }
462         
463         return 0;
464     }
465         catch(exception& e) {
466                 m->errorOut(e, "SffMultipleCommand", "readFile");
467                 exit(1);
468         }
469 }
470 //**********************************************************************************************************************
471 //runs sffinfo, summary.seqs, trim.flows, shhh.flows, trim.seqs, summary.seqs for each sff file.
472 int SffMultipleCommand::driver(vector<string> sffFiles, vector<string> oligosFiles, int start, int end, string fasta, string name, string group){
473     try {
474         m->mothurRemove(fasta); m->mothurRemove(name); m->mothurRemove(group);
475         int count = 0;
476         for (int s = start; s < end; s++) {
477             
478             string sff = sffFiles[s];
479             string oligos = oligosFiles[s];
480             
481             m->mothurOut("\n>>>>>\tProcessing " + sff + " (file " + toString(s+1) + " of " + toString(sffFiles.size()) + ")\t<<<<<\n");
482             
483             //run sff.info
484             string inputString = "sff=" + sff + ", flow=T";
485             if (trim) { inputString += ", trim=T"; }
486             m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
487             m->mothurOut("Running command: sffinfo(" + inputString + ")"); m->mothurOutEndLine(); 
488             m->mothurCalling = true;
489             
490             Command* sffCommand = new SffInfoCommand(inputString);
491             sffCommand->execute();
492             
493             if (m->control_pressed){ break; }
494             
495             map<string, vector<string> > filenames = sffCommand->getOutputFiles();
496             
497             delete sffCommand;
498             m->mothurCalling = false;
499             m->mothurOutEndLine(); 
500             
501             //run summary.seqs on the fasta file
502             string fastaFile = "";
503             map<string, vector<string> >::iterator it = filenames.find("fasta");
504             if (it != filenames.end()) {  if ((it->second).size() != 0) { fastaFile = (it->second)[0];  } }
505             else {  m->mothurOut("[ERROR]: sffinfo did not create a fasta file, quitting.\n"); m->control_pressed = true; break;  }
506             
507             inputString = "fasta=" + fastaFile + ", processors=1";
508             m->mothurOutEndLine(); 
509             m->mothurOut("Running command: summary.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
510             m->mothurCalling = true;
511             
512             Command* summarySeqsCommand = new SeqSummaryCommand(inputString);
513             summarySeqsCommand->execute();
514             
515             if (m->control_pressed){ break; }
516             
517             map<string, vector<string> > temp = summarySeqsCommand->getOutputFiles();
518             mergeOutputFileList(filenames, temp);
519             
520             delete summarySeqsCommand;
521             m->mothurCalling = false;
522             
523             m->mothurOutEndLine(); 
524             
525             //run trim.flows on the fasta file
526             string flowFile = "";
527             it = filenames.find("flow");
528             if (it != filenames.end()) {  if ((it->second).size() != 0) { flowFile = (it->second)[0];  } }
529             else {  m->mothurOut("[ERROR]: sffinfo did not create a flow file, quitting.\n"); m->control_pressed = true; break;  }
530             
531             inputString = "flow=" + flowFile;
532             if (oligos != "") { inputString += ", oligos=" + oligos; }
533             inputString += ", maxhomop=" + toString(maxHomoP) + ", maxflows=" + toString(maxFlows) + ", minflows=" + toString(minFlows);
534             inputString += ", pdiffs=" + toString(pdiffs) + ", bdiffs=" + toString(bdiffs) + ", ldiffs=" + toString(ldiffs) + ", sdiffs=" + toString(sdiffs);
535             inputString += ", tdiffs=" + toString(tdiffs) + ", signal=" + toString(signal) + ", noise=" + toString(noise) + ", order=" + flowOrder + ", processors=1";
536             
537             m->mothurOutEndLine(); 
538             m->mothurOut("Running command: trim.flows(" + inputString + ")"); m->mothurOutEndLine(); 
539             m->mothurCalling = true;
540             
541             Command* trimFlowCommand = new TrimFlowsCommand(inputString);
542             trimFlowCommand->execute();
543             
544             if (m->control_pressed){ break; }
545             
546             temp = trimFlowCommand->getOutputFiles();
547             mergeOutputFileList(filenames, temp);
548             
549             delete trimFlowCommand;
550             m->mothurCalling = false;
551             
552             
553             string fileFileName = "";
554             flowFile = "";
555             if (oligos != "") { 
556                 it = temp.find("file");
557                 if (it != temp.end()) {  if ((it->second).size() != 0) { fileFileName = (it->second)[0];  } }
558                 else {  m->mothurOut("[ERROR]: trim.flows did not create a file file, quitting.\n"); m->control_pressed = true; break;  }
559             }else {
560                 vector<string> flowFiles;
561                 it = temp.find("flow");
562                 if (it != temp.end()) {  if ((it->second).size() != 0) { flowFiles = (it->second);  } }
563                 else {  m->mothurOut("[ERROR]: trim.flows did not create a flow file, quitting.\n"); m->control_pressed = true; break;  }
564                 
565                 for (int i = 0; i < flowFiles.size(); i++) {
566                     string end = flowFiles[i].substr(flowFiles[i].length()-9);
567                     if (end == "trim.flow") {
568                         flowFile = flowFiles[i]; i+=flowFiles.size(); //if we found the trim.flow file stop looking
569                     }
570                 }
571             }
572             
573             if ((fileFileName == "") && (flowFile == "")) { m->mothurOut("[ERROR]: trim.flows did not create a file file or a trim.flow file, quitting.\n"); m->control_pressed = true; break;  }
574             
575             if (fileFileName != "") { inputString = "file=" + fileFileName; }
576             else { inputString = "flow=" + flowFile; }
577             
578             inputString += ", lookup=" + lookupFileName + ", cutoff=" + toString(cutoff); + ", maxiters=" + toString(maxIters);
579             if (large) { inputString += ", large=" + toString(largeSize); }
580             inputString += ", sigma=" +toString(sigma);
581             inputString += ", mindelta=" + toString(minDelta);  
582             inputString += ", order=" + flowOrder + ", processors=1";
583             
584             //run shhh.flows
585             m->mothurOutEndLine(); 
586             m->mothurOut("Running command: shhh.flows(" + inputString + ")"); m->mothurOutEndLine(); 
587             m->mothurCalling = true;
588             
589             Command* shhhFlowCommand = new ShhherCommand(inputString);
590             shhhFlowCommand->execute();
591             
592             if (m->control_pressed){ break; }
593             
594             temp = shhhFlowCommand->getOutputFiles();
595             mergeOutputFileList(filenames, temp);
596             
597             delete shhhFlowCommand;
598             m->mothurCalling = false;
599             
600             vector<string> fastaFiles;
601             vector<string> nameFiles;
602             it = temp.find("fasta");
603             if (it != temp.end()) {  if ((it->second).size() != 0) { fastaFiles = (it->second);  } }
604             else {  m->mothurOut("[ERROR]: shhh.flows did not create a fasta file, quitting.\n"); m->control_pressed = true; break;  }
605            
606             it = temp.find("name");
607             if (it != temp.end()) {  if ((it->second).size() != 0) { nameFiles = (it->second);  } }
608             else {  m->mothurOut("[ERROR]: shhh.flows did not create a name file, quitting.\n"); m->control_pressed = true; break;  }
609             
610             //find fasta and name files with the shortest name.  This is because if there is a composite name it will be the shortest.
611             fastaFile = fastaFiles[0];
612             for (int i = 1; i < fastaFiles.size(); i++) { if (fastaFiles[i].length() < fastaFile.length()) { fastaFile = fastaFiles[i]; } }
613             string nameFile = nameFiles[0];
614             for (int i = 1; i < nameFiles.size(); i++) { if (nameFiles[i].length() < nameFile.length()) { nameFile = nameFiles[i]; } }
615             
616             inputString = "fasta=" + fastaFile + ", name=" + nameFile;
617             if (oligos != "") { inputString += ", oligos=" + oligos; }
618             if (allFiles) { inputString += ", allfiles=t"; }
619             else { inputString += ", allfiles=f";  }
620             if (flip) { inputString += ", flip=t"; }
621             else { inputString += ", flip=f";  }
622             if (keepforward) { inputString += ", keepforward=t"; }
623             else { inputString += ", keepforward=f";  }
624             
625             
626             inputString += ", pdiffs=" + toString(pdiffs) + ", bdiffs=" + toString(bdiffs) + ", ldiffs=" + toString(ldiffs) + ", sdiffs=" + toString(sdiffs);
627             inputString += ", tdiffs=" + toString(tdiffs) + ", maxambig=" + toString(maxAmbig) + ", minlength=" + toString(minLength) + ", maxlength=" + toString(maxLength);
628             if (keepFirst != 0) { inputString += ", keepfirst=" + toString(keepFirst); }
629             if (removeLast != 0) { inputString += ", removelast=" + toString(removeLast); }
630             inputString += ", processors=1";
631             
632             //run trim.seqs
633             m->mothurOutEndLine(); 
634             m->mothurOut("Running command: trim.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
635             m->mothurCalling = true;
636             
637             Command* trimseqsCommand = new TrimSeqsCommand(inputString);
638             trimseqsCommand->execute();
639             
640             if (m->control_pressed){ break; }
641             
642             temp = trimseqsCommand->getOutputFiles();
643             mergeOutputFileList(filenames, temp);
644             
645             delete trimseqsCommand;
646             m->mothurCalling = false;
647             
648             it = temp.find("fasta");
649             if (it != temp.end()) {  if ((it->second).size() != 0) { fastaFiles = (it->second);  } }
650             else {  m->mothurOut("[ERROR]: trim.seqs did not create a fasta file, quitting.\n"); m->control_pressed = true; break;  }
651             
652             for (int i = 0; i < fastaFiles.size(); i++) {
653                 string end = fastaFiles[i].substr(fastaFiles[i].length()-10);
654                 if (end == "trim.fasta") {
655                     fastaFile = fastaFiles[i]; i+=fastaFiles.size(); //if we found the trim.fasta file stop looking
656                 }
657             }
658             
659             it = temp.find("name");
660             if (it != temp.end()) {  if ((it->second).size() != 0) { nameFiles = (it->second);  } }
661             else {  m->mothurOut("[ERROR]: trim.seqs did not create a name file, quitting.\n"); m->control_pressed = true; break;  }
662             
663             for (int i = 0; i < nameFiles.size(); i++) {
664                 string end = nameFiles[i].substr(nameFiles[i].length()-10);
665                 if (end == "trim.names") {
666                     nameFile = nameFiles[i]; i+=nameFiles.size(); //if we found the trim.names file stop looking
667                 }
668             }
669             
670             vector<string> groupFiles;
671             string groupFile = "";
672             if (makeGroup) {
673                 it = temp.find("group");
674                 if (it != temp.end()) {  if ((it->second).size() != 0) { groupFiles = (it->second);  } }
675             
676                 //find group file with the shortest name.  This is because if there is a composite group file it will be the shortest.
677                 groupFile = groupFiles[0];
678                 for (int i = 1; i < groupFiles.size(); i++) { if (groupFiles[i].length() < groupFile.length()) { groupFile = groupFiles[i]; } }
679             }
680             
681             inputString = "fasta=" + fastaFile + ", processors=1, name=" + nameFile;
682             m->mothurOutEndLine(); 
683             m->mothurOut("Running command: summary.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
684             m->mothurCalling = true;
685             
686             summarySeqsCommand = new SeqSummaryCommand(inputString);
687             summarySeqsCommand->execute();
688             
689             if (m->control_pressed){ break; }
690             
691             temp = summarySeqsCommand->getOutputFiles();
692             mergeOutputFileList(filenames, temp);
693             
694             delete summarySeqsCommand;
695             m->mothurCalling = false;
696             
697             m->mothurOutEndLine(); 
698             m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
699             
700             if (append) {
701                 m->appendFiles(fastaFile, fasta);
702                 m->appendFiles(nameFile, name);
703                 if (makeGroup) { m->appendFiles(groupFile, group);  }
704             }
705             count++;
706             
707             for (it = filenames.begin(); it != filenames.end(); it++) {
708                 for (int i = 0; i < (it->second).size(); i++) {
709                     outputNames.push_back((it->second)[i]); outputTypes[it->first].push_back((it->second)[i]);
710                 }
711             }
712         }
713         
714         return count;
715     }
716         catch(exception& e) {
717                 m->errorOut(e, "SffMultipleCommand", "driver");
718                 exit(1);
719         }
720 }
721 //**********************************************************************************************************************
722 int SffMultipleCommand::mergeOutputFileList(map<string, vector<string> >& files, map<string, vector<string> >& temp){
723     try {
724         map<string, vector<string> >::iterator it;
725         for (it = temp.begin(); it != temp.end(); it++) {
726             map<string, vector<string> >::iterator it2 = files.find(it->first);
727             if (it2 == files.end()) { //we do not already have this type so just add it
728                 files[it->first] = it->second;
729             }else { //merge them
730                 for (int i = 0; i < (it->second).size(); i++) {
731                     files[it->first].push_back((it->second)[i]);
732                 }
733             }
734         }
735         
736         return 0;
737     }
738         catch(exception& e) {
739                 m->errorOut(e, "SffMultipleCommand", "mergeOutputFileList");
740                 exit(1);
741         }
742 }
743 //**********************************************************************************************************************
744 int SffMultipleCommand::createProcesses(vector<string> sffFiles, vector<string> oligosFiles, string fasta, string name, string group){
745     try {
746         vector<int> processIDS;
747                 int process = 1;
748                 int num = 0;
749                                 
750                 //divide the groups between the processors
751                 vector<linePair> lines;
752         vector<int> numFilesToComplete;
753                 int numFilesPerProcessor = sffFiles.size() / processors;
754                 for (int i = 0; i < processors; i++) {
755                         int startIndex =  i * numFilesPerProcessor;
756                         int endIndex = (i+1) * numFilesPerProcessor;
757                         if(i == (processors - 1)){      endIndex = sffFiles.size();     }
758                         lines.push_back(linePair(startIndex, endIndex));
759             numFilesToComplete.push_back((endIndex-startIndex));
760                 }
761                 
762 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
763                 
764                 //loop through and create all the processes you want
765                 while (process != processors) {
766                         int pid = fork();
767                         
768                         if (pid > 0) {
769                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
770                                 process++;
771                         }else if (pid == 0){
772                                 num = driver(sffFiles, oligosFiles, lines[process].start, lines[process].end, fasta + toString(getpid()) + ".temp", name  + toString(getpid()) + ".temp", group  + toString(getpid()) + ".temp");
773                 
774                 //pass numSeqs to parent
775                                 ofstream out;
776                                 string tempFile = toString(getpid()) + ".num.temp";
777                                 m->openOutputFile(tempFile, out);
778                                 out << num << '\t' << outputNames.size() << endl;
779                 for (int i = 0; i < outputNames.size(); i++) {  out << outputNames[i] << endl;  }
780                                 out.close();
781                 
782                                 exit(0);
783                         }else { 
784                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
785                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
786                                 exit(0);
787                         }
788                 }
789                 
790                 //do my part
791                 num = driver(sffFiles, oligosFiles, lines[0].start, lines[0].end, fasta, name, group);
792                 
793                 //force parent to wait until all the processes are done
794                 for (int i=0;i<processIDS.size();i++) { 
795                         int temp = processIDS[i];
796                         wait(&temp);
797                 }
798         
799         for (int i=0;i<processIDS.size();i++) { 
800             ifstream in;
801                         string tempFile = toString(processIDS[i]) + ".num.temp";
802                         m->openInputFile(tempFile, in);
803                         if (!in.eof()) { 
804                 int tempNum = 0; int outputNamesSize = 0; 
805                 in >> tempNum >> outputNamesSize; m->gobble(in);
806                 for (int j = 0; j < outputNamesSize; j++) {
807                     string tempName;
808                     in >> tempName; m->gobble(in);
809                     outputNames.push_back(tempName);
810                 }
811                 if (tempNum != numFilesToComplete[i+1]) {
812                     m->mothurOut("[ERROR]: main process expected " + toString(processIDS[i]) + " to complete " + toString(numFilesToComplete[i+1]) + " files, and it only reported completing " + toString(tempNum) + ". This will cause file mismatches.  The flow files may be too large to process with multiple processors. \n");
813                 }
814             }
815                         in.close(); m->mothurRemove(tempFile);
816             
817             if (append) {
818                 m->appendFiles(fasta+toString(processIDS[i])+".temp", fasta);   m->mothurRemove(fasta+toString(processIDS[i])+".temp");
819                 m->appendFiles(name+toString(processIDS[i])+".temp", name);     m->mothurRemove(name+toString(processIDS[i])+".temp");
820                 if (makeGroup) { m->appendFiles(group+toString(processIDS[i])+".temp", group);  m->mothurRemove(group+toString(processIDS[i])+".temp"); }
821             }
822         }
823 #endif
824         return 0;
825         
826     }
827         catch(exception& e) {
828                 m->errorOut(e, "ShhherCommand", "createProcesses");
829                 exit(1);
830         }
831 }
832 //**********************************************************************************************************************
833
834
835
836