]> git.donarmstrong.com Git - mothur.git/blob - sffmultiplecommand.cpp
added oligos, pdiffs, bdiffs, ldiffs, sdiffs, tiffs parameters to sffinfo to allow...
[mothur.git] / sffmultiplecommand.cpp
1 //
2 //  sffmultiplecommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 8/14/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "sffmultiplecommand.h"
10 #include "sffinfocommand.h"
11 #include "seqsummarycommand.h"
12 #include "trimflowscommand.h"
13 #include "shhhercommand.h"
14 #include "trimseqscommand.h"
15
16
17 //**********************************************************************************************************************
18 vector<string> SffMultipleCommand::setParameters(){     
19         try {           
20                 CommandParameter pfile("file", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfile);
21         
22         //sffinfo
23                 CommandParameter ptrim("trim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(ptrim);
24         
25         //trim.flows
26                 CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "",false,false); parameters.push_back(pmaxhomop);
27                 CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "",false,false); parameters.push_back(pmaxflows);
28                 CommandParameter pminflows("minflows", "Number", "", "450", "", "", "",false,false); parameters.push_back(pminflows);
29                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
30                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
31         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
32                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
33         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
34                 CommandParameter psignal("signal", "Number", "", "0.50", "", "", "",false,false); parameters.push_back(psignal);
35                 CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pnoise);
36                 CommandParameter porder("order", "String", "", "", "", "", "",false,false); parameters.push_back(porder);
37
38         //shhh.flows
39         CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plookup);
40                 CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "",false,false); parameters.push_back(pcutoff);
41                 CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "",false,false); parameters.push_back(pmaxiter);
42         CommandParameter plarge("large", "Number", "", "-1", "", "", "",false,false); parameters.push_back(plarge);
43                 CommandParameter psigma("sigma", "Number", "", "60", "", "", "",false,false); parameters.push_back(psigma);
44                 CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "",false,false); parameters.push_back(pmindelta);
45         
46         //trim.seqs parameters
47         CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "",false,false); parameters.push_back(pallfiles);
48         CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
49                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
50                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
51                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
52                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
53                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
54                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
55                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
56                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
57                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
58                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
59                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
60                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
61                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
62
63         
64         CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
65                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
66                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
67                 
68                 vector<string> myArray;
69                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
70                 return myArray;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "SffMultipleCommand", "setParameters");
74                 exit(1);
75         }
76 }
77 //**********************************************************************************************************************
78 string SffMultipleCommand::getHelpString(){     
79         try {
80                 string helpString = "";
81                 helpString += "The sff.multiple command reads a file containing sff filenames and optional oligos filenames. It runs the files through sffinfo, trim.flows, shhh.flows and trim.seqs combining the results.\n";
82                 helpString += "The sff.multiple command parameters are: ";
83         vector<string> parameters = setParameters();
84         for (int i = 0; i < parameters.size()-1; i++) {
85             helpString += parameters[i] + ", ";
86         }
87         helpString += parameters[parameters.size()-1] + ".\n";
88                 helpString += "The file parameter allows you to enter the a file containing the list of sff files and optional oligos files.\n";
89         helpString += "The trim parameter allows you to indicate if you would like a sequences and quality scores generated by sffinfo trimmed to the clipQualLeft and clipQualRight values.  Default=True. \n";
90         helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
91                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
92                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
93                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
94                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
95                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
96                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
97         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
98                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
99                 helpString += "The qfile parameter allows you to provide a quality file.\n";
100                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
101                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
102                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
103                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
104                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
105                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
106                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
107                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
108                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
109                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
110                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
111
112                 helpString += "Example sff.multiple(file=mySffOligosFile.txt, trim=F).\n";
113                 helpString += "Note: No spaces between parameter labels (i.e. file), '=' and parameters (i.e.mySffOligosFile.txt).\n";
114                 return helpString;
115         }
116         catch(exception& e) {
117                 m->errorOut(e, "SffMultipleCommand", "getHelpString");
118                 exit(1);
119         }
120 }
121 //**********************************************************************************************************************
122 string SffMultipleCommand::getOutputFileNameTag(string type, string inputName=""){      
123         try {
124         string outputFileName = "";
125                 map<string, vector<string> >::iterator it;
126         
127         //is this a type this command creates
128         it = outputTypes.find(type);
129         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
130         else {
131             m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  
132         }
133         return outputFileName;
134         }
135         catch(exception& e) {
136                 m->errorOut(e, "SffMultipleCommand", "getOutputFileNameTag");
137                 exit(1);
138         }
139 }
140
141
142 //**********************************************************************************************************************
143 SffMultipleCommand::SffMultipleCommand(){       
144         try {
145                 abort = true; calledHelp = true; 
146                 setParameters();
147                 vector<string> tempOutNames;
148                 outputTypes["fasta"] = tempOutNames;
149                 outputTypes["flow"] = tempOutNames;
150                 outputTypes["qfile"] = tempOutNames;
151         }
152         catch(exception& e) {
153                 m->errorOut(e, "SffMultipleCommand", "SffMultipleCommand");
154                 exit(1);
155         }
156 }
157 //**********************************************************************************************************************
158
159 SffMultipleCommand::SffMultipleCommand(string option)  {
160         try {
161                 abort = false; calledHelp = false;   
162                 
163                 //allow user to run help
164                 if(option == "help") { help(); abort = true; calledHelp = true; }
165                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
166                 
167                 else {
168                         //valid paramters for this command
169                         vector<string> myArray = setParameters();
170                         
171                         OptionParser parser(option);
172                         map<string, string> parameters = parser.getParameters();
173                         
174                         ValidParameters validParameter;
175             map<string,string>::iterator it;
176             
177                         //check to make sure all parameters are valid for command
178                         for (it = parameters.begin(); it != parameters.end(); it++) { 
179                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
180                         }
181                         
182                         //initialize outputTypes
183                         vector<string> tempOutNames;
184                         outputTypes["fasta"] = tempOutNames;
185                         outputTypes["flow"] = tempOutNames;
186                         outputTypes["qfile"] = tempOutNames;
187                         
188                         //if the user changes the output directory command factory will send this info to us in the output parameter 
189                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
190                         
191                         //if the user changes the input directory command factory will send this info to us in the output parameter 
192                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
193                         if (inputDir == "not found"){   inputDir = "";          }
194                         else {
195                                 string path;
196                 it = parameters.find("file");
197                                 //user has given a template file
198                                 if(it != parameters.end()){ 
199                                         path = m->hasPath(it->second);
200                                         //if the user has not given a path then, add inputdir. else leave path alone.
201                                         if (path == "") {       parameters["file"] = inputDir + it->second;             }
202                                 }
203                         }
204             
205                         filename = validParameter.validFile(parameters, "file", true);
206             if (filename == "not open") { filename = ""; abort = true; }
207             else if (filename == "not found") { filename = "";  }
208                         
209                         string temp;
210                         temp = validParameter.validFile(parameters, "trim", false);                                     if (temp == "not found"){       temp = "T";                             }
211                         trim = m->isTrue(temp); 
212             
213             temp = validParameter.validFile(parameters, "minflows", false);     if (temp == "not found") { temp = "450"; }
214                         m->mothurConvert(temp, minFlows);  
215             
216                         temp = validParameter.validFile(parameters, "maxflows", false); if (temp == "not found") { temp = "450"; }
217                         m->mothurConvert(temp, maxFlows);  
218             
219             temp = validParameter.validFile(parameters, "maxhomop", false);             if (temp == "not found"){       temp = "9";             }
220                         m->mothurConvert(temp, maxHomoP);  
221             
222                         temp = validParameter.validFile(parameters, "signal", false);           if (temp == "not found"){       temp = "0.50";  }
223                         m->mothurConvert(temp, signal);  
224             
225                         temp = validParameter.validFile(parameters, "noise", false);            if (temp == "not found"){       temp = "0.70";  }
226                         m->mothurConvert(temp, noise);  
227             
228                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found"){       temp = "0";             }
229                         m->mothurConvert(temp, bdiffs);
230                         
231                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found"){       temp = "0";             }
232                         m->mothurConvert(temp, pdiffs);
233                         
234             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
235                         m->mothurConvert(temp, ldiffs);
236             
237             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
238                         m->mothurConvert(temp, sdiffs);
239                         
240                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
241                         m->mothurConvert(temp, tdiffs);
242                         
243                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
244             
245                         
246                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
247                         m->setProcessors(temp);
248                         m->mothurConvert(temp, processors);
249             
250                         flowOrder = validParameter.validFile(parameters, "order", false);
251                         if (flowOrder == "not found"){ flowOrder = "TACG";              }
252                         else if(flowOrder.length() != 4){
253                                 m->mothurOut("The value of the order option must be four bases long\n");
254                         }
255             
256             temp = validParameter.validFile(parameters, "cutoff", false);       if (temp == "not found"){       temp = "0.01";          }
257                         m->mothurConvert(temp, cutoff); 
258                         
259                         temp = validParameter.validFile(parameters, "mindelta", false); if (temp == "not found"){       temp = "0.000001";      }
260                         m->mothurConvert(temp, minDelta); 
261             
262                         temp = validParameter.validFile(parameters, "maxiter", false);  if (temp == "not found"){       temp = "1000";          }
263                         m->mothurConvert(temp, maxIters); 
264             
265             temp = validParameter.validFile(parameters, "large", false);        if (temp == "not found"){       temp = "0";             }
266                         m->mothurConvert(temp, largeSize); 
267             if (largeSize != 0) { large = true; }
268             else { large = false;  }
269             if (largeSize < 0) {  m->mothurOut("The value of the large cannot be negative.\n"); }
270             
271                         temp = validParameter.validFile(parameters, "sigma", false);if (temp == "not found")    {       temp = "60";            }
272                         m->mothurConvert(temp, sigma); 
273             
274             temp = validParameter.validFile(parameters, "flip", false);
275                         if (temp == "not found")    {   flip = 0;       }
276                         else {  flip = m->isTrue(temp);         }
277                         
278                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
279                         m->mothurConvert(temp, maxAmbig);  
280                        
281                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
282                         m->mothurConvert(temp, minLength); 
283                         
284                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
285                         m->mothurConvert(temp, maxLength);
286                                                 
287                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
288                         m->mothurConvert(temp, qThreshold);
289                         
290                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
291                         qtrim = m->isTrue(temp);
292             
293                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
294                         convert(temp, qRollAverage);
295             
296                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
297                         convert(temp, qWindowAverage);
298             
299                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
300                         convert(temp, qWindowSize);
301             
302                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
303                         convert(temp, qWindowStep);
304             
305                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
306                         convert(temp, qAverage);
307             
308                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
309                         convert(temp, keepFirst);
310             
311                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
312                         convert(temp, removeLast);
313                         
314                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
315                         allFiles = m->isTrue(temp);
316             
317             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
318                         keepforward = m->isTrue(temp);
319                   
320             numFPrimers = 0;
321                         numRPrimers = 0;
322             numLinkers = 0;
323             numSpacers = 0;
324                 }
325         }
326         catch(exception& e) {
327                 m->errorOut(e, "SffMultipleCommand", "SffMultipleCommand");
328                 exit(1);
329         }
330 }
331 //**********************************************************************************************************************
332 int SffMultipleCommand::execute(){
333         try {
334                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
335                 
336                 vector<string> sffFiles, oligosFiles;
337         readFile(sffFiles, oligosFiles);
338         
339         if (m->control_pressed) { return 0; }
340         
341         if (sffFiles.size() < processors) { processors = sffFiles.size(); }
342     
343         if (processors == 1) { driver(sffFiles, oligosFiles, 0, sffFiles.size()); }
344         else { createProcesses(sffFiles, oligosFiles); } 
345                 
346                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } return 0; }
347                 
348                 //report output filenames
349                 m->mothurOutEndLine();
350                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
351                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
352                 m->mothurOutEndLine();
353         
354                 return 0;
355         }
356         catch(exception& e) {
357                 m->errorOut(e, "SffMultipleCommand", "execute");
358                 exit(1);
359         }
360 }
361 //**********************************************************************************************************************
362 int SffMultipleCommand::readFile(vector<string>& sffFiles, vector<string>& oligosFiles){
363         try {
364         
365         ifstream in;
366         m->openInputFile(filename, in);
367         
368         string oligos, sff;
369         while (!in.eof()) {
370             
371             if (m->control_pressed) { break; }
372             
373             in >> sff;
374             
375             //ignore file pairing
376             if(sff[0] == '#'){ while (!in.eof())        {       char c = in.get();  if (c == 10 || c == 13){    break;  }       } m->gobble(in); }
377             else { //check for oligos file
378                 oligos = "";
379             
380                 // get rest of line in case there is a oligos filename
381                 while (!in.eof())       {       
382                     char c = in.get(); 
383                     if (c == 10 || c == 13){    break;  }
384                     else if (c == 32 || c == 9){;} //space or tab
385                     else {      oligos += c;  }
386                 } 
387             }
388             m->gobble(in);
389             
390             sffFiles.push_back(sff);
391             oligosFiles.push_back(oligos); //will push a blank if there is not an oligos for this sff file
392         }
393         in.close();
394         
395         return 0;
396     }
397         catch(exception& e) {
398                 m->errorOut(e, "SffMultipleCommand", "readFile");
399                 exit(1);
400         }
401 }
402 //**********************************************************************************************************************
403 int SffMultipleCommand::driver(vector<string> sffFiles, vector<string> oligosFiles, int start, int end){
404     try {
405         int count = 0;
406         for (int i = start; i < end; i++) {
407             string sff = sffFiles[i];
408             string oligos = oligosFiles[i];
409             
410             //run sff.info
411             string inputString = "sff=" + sff + ", flow=T";
412             if (trim) { inputString += ", trim=T"; }
413             m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
414             m->mothurOut("Running command: sffinfo(" + inputString + ")"); m->mothurOutEndLine(); 
415             m->mothurCalling = true;
416             
417             Command* sffCommand = new SffInfoCommand(inputString);
418             sffCommand->execute();
419             
420             map<string, vector<string> > filenames = sffCommand->getOutputFiles();
421             
422             delete sffCommand;
423             m->mothurCalling = false;
424             m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
425             
426             //run summary.seqs on the fasta file
427             string fastaFile = "";
428             map<string, vector<string> >::iterator it = filenames.find("fasta");
429             if (it != filenames.end()) {  if ((it->second).size() != 0) { fastaFile = (it->second)[0];  } }
430             else {  m->mothurOut("[ERROR]: sffinfo did not create a fasta file, quitting.\n"); m->control_pressed = true; break;  }
431             
432             inputString = "fasta=" + fastaFile;
433             m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
434             m->mothurOut("Running command: summary.seqs(" + inputString + ")"); m->mothurOutEndLine(); 
435             m->mothurCalling = true;
436             
437             Command* summarySeqsCommand = new SeqSummaryCommand(inputString);
438             summarySeqsCommand->execute();
439             
440             delete summarySeqsCommand;
441             m->mothurCalling = false;
442             m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
443             
444             count++;
445         }
446         
447         return count;
448     }
449         catch(exception& e) {
450                 m->errorOut(e, "SffMultipleCommand", "driver");
451                 exit(1);
452         }
453 }
454 //**********************************************************************************************************************
455 int SffMultipleCommand::createProcesses(vector<string> sffFiles, vector<string> oligosFiles){
456     try {
457         vector<int> processIDS;
458                 int process = 1;
459                 int num = 0;
460                                 
461                 //divide the groups between the processors
462                 vector<linePair> lines;
463         vector<int> numFilesToComplete;
464                 int numFilesPerProcessor = sffFiles.size() / processors;
465                 for (int i = 0; i < processors; i++) {
466                         int startIndex =  i * numFilesPerProcessor;
467                         int endIndex = (i+1) * numFilesPerProcessor;
468                         if(i == (processors - 1)){      endIndex = sffFiles.size();     }
469                         lines.push_back(linePair(startIndex, endIndex));
470             numFilesToComplete.push_back((endIndex-startIndex));
471                 }
472                 
473 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)          
474                 
475                 //loop through and create all the processes you want
476                 while (process != processors) {
477                         int pid = fork();
478                         
479                         if (pid > 0) {
480                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
481                                 process++;
482                         }else if (pid == 0){
483                                 num = driver(sffFiles, oligosFiles, lines[process].start, lines[process].end);
484                 
485                 //pass numSeqs to parent
486                                 ofstream out;
487                                 string tempFile = toString(getpid()) + ".num.temp";
488                                 m->openOutputFile(tempFile, out);
489                                 out << num << endl;
490                                 out.close();
491                 
492                                 exit(0);
493                         }else { 
494                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
495                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
496                                 exit(0);
497                         }
498                 }
499                 
500                 //do my part
501                 num = driver(sffFiles, oligosFiles, lines[0].start, lines[0].end);
502                 
503                 //force parent to wait until all the processes are done
504                 for (int i=0;i<processIDS.size();i++) { 
505                         int temp = processIDS[i];
506                         wait(&temp);
507                 }
508         
509 #else
510         
511         //////////////////////////////////////////////////////////////////////////////////////////////////////
512                 //Windows version shared memory, so be careful when passing variables through the sffMultiplesData struct. 
513                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
514                 //////////////////////////////////////////////////////////////////////////////////////////////////////
515                 /*
516          vector<shhhFlowsData*> pDataArray; 
517          DWORD   dwThreadIdArray[processors-1];
518          HANDLE  hThreadArray[processors-1]; 
519          
520          //Create processor worker threads.
521          for( int i=0; i<processors-1; i++ ){
522          // Allocate memory for thread data.
523          string extension = "";
524          if (i != 0) { extension = toString(i) + ".temp"; }
525          
526          shhhFlowsData* tempFlow = new shhhFlowsData(filenames, (compositeFASTAFileName + extension), (compositeNamesFileName + extension), outputDir, flowOrder, jointLookUp, singleLookUp, m, lines[i].start, lines[i].end, cutoff, sigma, minDelta, maxIters, i);
527          pDataArray.push_back(tempFlow);
528          processIDS.push_back(i);
529          
530          hThreadArray[i] = CreateThread(NULL, 0, ShhhFlowsThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
531          }
532          
533          //using the main process as a worker saves time and memory
534          //do my part
535          driver(filenames, compositeFASTAFileName, compositeNamesFileName, lines[processors-1].start, lines[processors-1].end);
536          
537          //Wait until all threads have terminated.
538          WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
539          
540          //Close all thread handles and free memory allocations.
541          for(int i=0; i < pDataArray.size(); i++){
542          for(int j=0; j < pDataArray[i]->outputNames.size(); j++){ outputNames.push_back(pDataArray[i]->outputNames[j]); }
543          CloseHandle(hThreadArray[i]);
544          delete pDataArray[i];
545          }
546          */
547 #endif
548         
549         for (int i=0;i<processIDS.size();i++) { 
550             ifstream in;
551                         string tempFile = toString(processIDS[i]) + ".num.temp";
552                         m->openInputFile(tempFile, in);
553                         if (!in.eof()) { 
554                 int tempNum = 0; 
555                 in >> tempNum; 
556                 if (tempNum != numFilesToComplete[i+1]) {
557                     m->mothurOut("[ERROR]: main process expected " + toString(processIDS[i]) + " to complete " + toString(numFilesToComplete[i+1]) + " files, and it only reported completing " + toString(tempNum) + ". This will cause file mismatches.  The flow files may be too large to process with multiple processors. \n");
558                 }
559             }
560                         in.close(); m->mothurRemove(tempFile);
561         }
562         
563         return 0;
564         
565     }
566         catch(exception& e) {
567                 m->errorOut(e, "ShhherCommand", "createProcesses");
568                 exit(1);
569         }
570 }
571 //**********************************************************************************************************************
572
573
574
575