]> git.donarmstrong.com Git - mothur.git/blob - trimflowscommand.cpp
working on windows paralellization, added trimOligos class to be used by trim.flows...
[mothur.git] / trimflowscommand.cpp
1 /*
2  *  trimflowscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 12/22/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "trimflowscommand.h"
11 #include "needlemanoverlap.hpp"
12
13
14 //**********************************************************************************************************************
15 vector<string> TrimFlowsCommand::setParameters(){       
16         try {
17                 CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pflow);
18                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19                 CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "",false,false); parameters.push_back(pmaxhomop);
20                 CommandParameter pmaxflows("maxflows", "Number", "", "720", "", "", "",false,false); parameters.push_back(pmaxflows);
21                 CommandParameter pminflows("minflows", "Number", "", "360", "", "", "",false,false); parameters.push_back(pminflows);
22                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
23                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
24                 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
25                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
26                 CommandParameter psignal("signal", "Number", "", "0.50", "", "", "",false,false); parameters.push_back(psignal);
27                 CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pnoise);
28                 CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "",false,false); parameters.push_back(pallfiles);
29                 CommandParameter porder("order", "String", "", "", "", "", "",false,false); parameters.push_back(porder);
30                 CommandParameter pfasta("fasta", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfasta);
31                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
32                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
33                 
34                 vector<string> myArray;
35                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
36                 return myArray;
37         }
38         catch(exception& e) {
39                 m->errorOut(e, "TrimFlowsCommand", "setParameters");
40                 exit(1);
41         }
42 }
43 //**********************************************************************************************************************
44 string TrimFlowsCommand::getHelpString(){       
45         try {
46                 string helpString = "";
47                 helpString += "The trim.flows command reads a flowgram file and creates .....\n";
48                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
49                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.flows.\n";
50                 return helpString;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "TrimFlowsCommand", "getHelpString");
54                 exit(1);
55         }
56 }
57
58 //**********************************************************************************************************************
59
60 TrimFlowsCommand::TrimFlowsCommand(){   
61         try {
62                 abort = true; calledHelp = true; 
63                 setParameters();
64                 vector<string> tempOutNames;
65                 outputTypes["flow"] = tempOutNames;
66                 outputTypes["fasta"] = tempOutNames;
67         }
68         catch(exception& e) {
69                 m->errorOut(e, "TrimFlowsCommand", "TrimFlowsCommand");
70                 exit(1);
71         }
72 }
73 //**********************************************************************************************************************
74
75 TrimFlowsCommand::TrimFlowsCommand(string option)  {
76         try {
77                 
78                 abort = false; calledHelp = false;   
79                 comboStarts = 0;
80                 
81                 //allow user to run help
82                 if(option == "help") { help(); abort = true; calledHelp = true; }
83                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
84                 
85                 else {
86                                                 
87                         vector<string> myArray = setParameters();
88                         
89                         OptionParser parser(option);
90                         map<string,string> parameters = parser.getParameters();
91                         
92                         ValidParameters validParameter;
93                         map<string,string>::iterator it;
94                         
95                         //check to make sure all parameters are valid for command
96                         for (it = parameters.begin(); it != parameters.end(); it++) { 
97                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
98                         }
99                         
100                         //initialize outputTypes
101                         vector<string> tempOutNames;
102                         outputTypes["flow"] = tempOutNames;
103                         outputTypes["fasta"] = tempOutNames;
104                         
105                         //if the user changes the input directory command factory will send this info to us in the output parameter 
106                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
107                         if (inputDir == "not found"){   inputDir = "";          }
108                         else {
109                                 string path;
110                                 it = parameters.find("flow");
111                                 //user has given a template file
112                                 if(it != parameters.end()){ 
113                                         path = m->hasPath(it->second);
114                                         //if the user has not given a path then, add inputdir. else leave path alone.
115                                         if (path == "") {       parameters["flow"] = inputDir + it->second;             }
116                                 }
117                                 
118                                 it = parameters.find("oligos");
119                                 //user has given a template file
120                                 if(it != parameters.end()){ 
121                                         path = m->hasPath(it->second);
122                                         //if the user has not given a path then, add inputdir. else leave path alone.
123                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
124                                 }
125                                 
126                         }
127                         
128                         
129                         //check for required parameters
130                         flowFileName = validParameter.validFile(parameters, "flow", true);
131                         if (flowFileName == "not found") { 
132                                 flowFileName = m->getFlowFile(); 
133                                 if (flowFileName != "") {  m->mothurOut("Using " + flowFileName + " as input file for the flow parameter."); m->mothurOutEndLine(); }
134                                 else { 
135                                         m->mothurOut("No valid current flow file. You must provide a flow file."); m->mothurOutEndLine(); 
136                                         abort = true;
137                                 } 
138                         }else if (flowFileName == "not open") { flowFileName = ""; abort = true; }      
139                         
140                         //if the user changes the output directory command factory will send this info to us in the output parameter 
141                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
142                                 outputDir = ""; 
143                                 outputDir += m->hasPath(flowFileName); //if user entered a file with a path then preserve it    
144                         }
145                         
146                         
147                         //check for optional parameter and set defaults
148                         // ...at some point should added some additional type checking...
149                         
150                         string temp;
151                         temp = validParameter.validFile(parameters, "minflows", false); if (temp == "not found") { temp = "450"; }
152                         convert(temp, minFlows);  
153
154                         temp = validParameter.validFile(parameters, "maxflows", false); if (temp == "not found") { temp = "450"; }
155                         convert(temp, maxFlows);  
156                         
157                         
158                         temp = validParameter.validFile(parameters, "oligos", true);
159                         if (temp == "not found")        {       oligoFileName = "";             }
160                         else if(temp == "not open")     {       abort = true;                   } 
161                         else                                            {       oligoFileName = temp;   m->setOligosFile(oligoFileName); }
162                         
163                         temp = validParameter.validFile(parameters, "fasta", false);            if (temp == "not found"){       fasta = 0;              }
164                         else if(m->isTrue(temp))        {       fasta = 1;      }
165                         
166                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found"){       temp = "9";             }
167                         convert(temp, maxHomoP);  
168
169                         temp = validParameter.validFile(parameters, "signal", false);           if (temp == "not found"){       temp = "0.50";  }
170                         convert(temp, signal);  
171
172                         temp = validParameter.validFile(parameters, "noise", false);            if (temp == "not found"){       temp = "0.70";  }
173                         convert(temp, noise);  
174         
175                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found"){       temp = "0";             }
176                         convert(temp, bdiffs);
177                         
178                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found"){       temp = "0";             }
179                         convert(temp, pdiffs);
180                         
181                         temp = validParameter.validFile(parameters, "tdiffs", false);
182                         if (temp == "not found"){ int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
183                         convert(temp, tdiffs);
184                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
185                         
186                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
187                         m->setProcessors(temp);
188                         convert(temp, processors);
189         
190                         flowOrder = validParameter.validFile(parameters, "order", false);
191                         if (flowOrder == "not found"){ flowOrder = "TACG";              }
192                         else if(flowOrder.length() != 4){
193                                 m->mothurOut("The value of the order option must be four bases long\n");
194                         }
195
196                         if(oligoFileName == "") {       allFiles = 0;           }
197                         else                                    {       allFiles = 1;           }
198
199                         numFPrimers = 0;
200                         numRPrimers = 0;
201                 }
202                 
203         }
204         catch(exception& e) {
205                 m->errorOut(e, "TrimFlowsCommand", "TrimSeqsCommand");
206                 exit(1);
207         }
208 }
209
210 //***************************************************************************************************************
211
212 int TrimFlowsCommand::execute(){
213         try{
214                 
215                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
216
217                 string trimFlowFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "trim.flow";
218                 outputNames.push_back(trimFlowFileName); outputTypes["flow"].push_back(trimFlowFileName);
219                 
220                 string scrapFlowFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "scrap.flow";
221                 outputNames.push_back(scrapFlowFileName); outputTypes["flow"].push_back(scrapFlowFileName);
222
223                 string fastaFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.fasta";
224                 if(fasta){
225                         outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName);
226                 }
227                 
228                 vector<unsigned long long> flowFilePos;
229         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
230                 flowFilePos = getFlowFileBreaks();
231                 for (int i = 0; i < (flowFilePos.size()-1); i++) {
232                         lines.push_back(new linePair(flowFilePos[i], flowFilePos[(i+1)]));
233                 }       
234         #else
235                 ifstream in; m->openInputFile(flowFileName, in); in >> numFlows; in.close();
236         ///////////////////////////////////////// until I fix multiple processors for windows //////////////////        
237                 processors = 1;
238         ///////////////////////////////////////// until I fix multiple processors for windows //////////////////                
239                 if (processors == 1) {
240                         lines.push_back(new linePair(0, 1000));
241                 }else {
242                         int numFlowLines;
243                         flowFilePos = m->setFilePosEachLine(flowFileName, numFlowLines);
244                         flowFilePos.erase(flowFilePos.begin() + 1); numFlowLines--;
245                         
246                         //figure out how many sequences you have to process
247                         int numSeqsPerProcessor = numFlowLines / processors;
248                         cout << numSeqsPerProcessor << '\t' << numFlowLines << endl;
249                         for (int i = 0; i < processors; i++) {
250                                 int startIndex =  i * numSeqsPerProcessor;
251                                 if(i == (processors - 1)){      numSeqsPerProcessor = numFlowLines - i * numSeqsPerProcessor;   }
252                                 lines.push_back(new linePair(flowFilePos[startIndex], numSeqsPerProcessor));
253                                 cout << flowFilePos[startIndex] << '\t' << numSeqsPerProcessor << endl;
254                         }
255                 }
256         #endif
257                 
258                 vector<vector<string> > barcodePrimerComboFileNames;
259                 if(oligoFileName != ""){
260                         getOligos(barcodePrimerComboFileNames); 
261                 }
262                 
263                 if(processors == 1){
264                         driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames, lines[0]);
265                 }else{
266                         createProcessesCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames); 
267                 }       
268                 
269                 if (m->control_pressed) {  return 0; }                  
270                 
271                 string flowFilesFileName;
272                 ofstream output;
273                 
274                 if(allFiles){
275                         
276                         flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.files";
277                         m->openOutputFile(flowFilesFileName, output);
278
279                         for(int i=0;i<barcodePrimerComboFileNames.size();i++){
280                                 for(int j=0;j<barcodePrimerComboFileNames[0].size();j++){
281                                         
282                                         FILE * pFile;
283                                         unsigned long long size;
284                                         
285                                         //get num bytes in file
286                                         pFile = fopen (barcodePrimerComboFileNames[i][j].c_str(),"rb");
287                                         if (pFile==NULL) perror ("Error opening file");
288                                         else{
289                                                 fseek (pFile, 0, SEEK_END);
290                                                 size=ftell(pFile);
291                                                 fclose (pFile);
292                                         }
293
294                                         if(size < 10){
295                                                 m->mothurRemove(barcodePrimerComboFileNames[i][j]);
296                                         }
297                                         else{
298                                                 output << barcodePrimerComboFileNames[i][j] << endl;
299                                                 outputNames.push_back(barcodePrimerComboFileNames[i][j]);
300                                                 outputTypes["flow"].push_back(barcodePrimerComboFileNames[i][j]);
301                                         }
302                                 }
303                         }
304                         output.close();
305                 }
306                 else{
307                         flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.files";
308                         m->openOutputFile(flowFilesFileName, output);
309                         
310                         output << trimFlowFileName << endl;
311                         
312                         output.close();
313                 }
314                 outputTypes["flow.files"].push_back(flowFilesFileName);
315                 outputNames.push_back(flowFilesFileName);
316                 
317 //              set fasta file as new current fastafile
318 //              string current = "";
319 //              itTypes = outputTypes.find("fasta");
320 //              if (itTypes != outputTypes.end()) {
321 //                      if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
322 //              }
323                 
324                 m->mothurOutEndLine();
325                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
326                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
327                 m->mothurOutEndLine();
328                 
329                 return 0;       
330         }
331         catch(exception& e) {
332                 m->errorOut(e, "TrimSeqsCommand", "execute");
333                 exit(1);
334         }
335 }
336
337 //***************************************************************************************************************
338
339 int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileName, string scrapFlowFileName, string fastaFileName, vector<vector<string> > thisBarcodePrimerComboFileNames, linePair* line){
340         
341         try {
342                 ofstream trimFlowFile;
343                 m->openOutputFile(trimFlowFileName, trimFlowFile);
344                 trimFlowFile.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint);
345
346                 ofstream scrapFlowFile;
347                 m->openOutputFile(scrapFlowFileName, scrapFlowFile);
348                 scrapFlowFile.setf(ios::fixed, ios::floatfield); scrapFlowFile.setf(ios::showpoint);
349                 
350                 ofstream fastaFile;
351                 if(fasta){      m->openOutputFile(fastaFileName, fastaFile);    }
352                 
353                 ifstream flowFile;
354                 m->openInputFile(flowFileName, flowFile);
355                 
356                 flowFile.seekg(line->start);
357                 
358                 if(line->start == 0){
359                         flowFile >> numFlows; m->gobble(flowFile);
360                         scrapFlowFile << maxFlows << endl;
361                         trimFlowFile << maxFlows << endl;
362                         if(allFiles){
363                                 for(int i=0;i<thisBarcodePrimerComboFileNames.size();i++){
364                                         for(int j=0;j<thisBarcodePrimerComboFileNames[0].size();j++){
365                                                 ofstream temp;
366                                                 m->openOutputFile(thisBarcodePrimerComboFileNames[i][j], temp);
367                                                 temp << maxFlows << endl;
368                                                 temp.close();
369                                         }
370                                 }                       
371                         }
372                 }
373                 
374                 FlowData flowData(numFlows, signal, noise, maxHomoP, flowOrder);
375                 //cout << " driver flowdata address " <<  &flowData  << &flowFile << endl;      
376                 int count = 0;
377                 bool moreSeqs = 1;
378                 
379                 TrimOligos trimOligos(pdiffs, bdiffs, primers, barcodes, revPrimer);
380                 
381                 while(moreSeqs) {
382                         //cout << "driver " << count << endl;
383                         
384         
385                         if (m->control_pressed) { break; }
386                         
387                         int success = 1;
388                         int currentSeqDiffs = 0;
389                         string trashCode = "";
390                         
391                         flowData.getNext(flowFile); 
392                         //cout << "driver good bit " << flowFile.good() << endl;        
393                         flowData.capFlows(maxFlows);    
394                         
395                         Sequence currSeq = flowData.getSequence();
396                         
397                         if(!flowData.hasMinFlows(minFlows)){    //screen to see if sequence is of a minimum number of flows
398                                 success = 0;
399                                 trashCode += 'l';
400                         }
401                         
402                         int primerIndex = 0;
403                         int barcodeIndex = 0;
404                         
405                         if(barcodes.size() != 0){
406                                 success = trimOligos.stripBarcode(currSeq, barcodeIndex);
407                                 if(success > bdiffs)            {       trashCode += 'b';       }
408                                 else{ currentSeqDiffs += success;  }
409                         }
410                         
411                         if(numFPrimers != 0){
412                                 success = trimOligos.stripForward(currSeq, primerIndex);
413                                 if(success > pdiffs)            {       trashCode += 'f';       }
414                                 else{ currentSeqDiffs += success;  }
415                         }
416                         
417                         if (currentSeqDiffs > tdiffs)   {       trashCode += 't';   }
418                         
419                         if(numRPrimers != 0){
420                                 success = trimOligos.stripReverse(currSeq);
421                                 if(!success)                            {       trashCode += 'r';       }
422                         }
423                         
424                         if(trashCode.length() == 0){
425                                                         
426                                 flowData.printFlows(trimFlowFile);
427                         
428                                 if(fasta)       {       currSeq.printSequence(fastaFile);       }
429                                 
430                                 if(allFiles){
431                                         ofstream output;
432                                         m->openOutputFileAppend(thisBarcodePrimerComboFileNames[barcodeIndex][primerIndex], output);
433                                         output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint);
434                                         
435                                         flowData.printFlows(output);
436                                         output.close();
437                                 }                               
438                         }
439                         else{
440                                 flowData.printFlows(scrapFlowFile, trashCode);
441                         }
442                                 
443                         count++;
444                         //cout << "driver" << '\t' << currSeq.getName() << endl;                        
445                         //report progress
446                         if((count) % 10000 == 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
447
448 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
449                         unsigned long long pos = flowFile.tellg();
450
451                         if ((pos == -1) || (pos >= line->end)) { break; }
452 #else
453                         if (flowFile.eof()) { break; }
454 #endif
455                         
456                 }
457                 //report progress
458                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
459                 
460                 trimFlowFile.close();
461                 scrapFlowFile.close();
462                 flowFile.close();
463                 if(fasta){      fastaFile.close();      }
464                 
465                 return count;
466         }
467         catch(exception& e) {
468                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
469                 exit(1);
470         }
471 }
472
473 //***************************************************************************************************************
474
475 void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
476         try {
477                 ifstream oligosFile;
478                 m->openInputFile(oligoFileName, oligosFile);
479                 
480                 string type, oligo, group;
481
482                 int indexPrimer = 0;
483                 int indexBarcode = 0;
484                 
485                 while(!oligosFile.eof()){
486                 
487                         oligosFile >> type; m->gobble(oligosFile);      //get the first column value of the row - is it a comment or a feature we are interested in?
488
489                         if(type[0] == '#'){     //igore the line because there's a comment
490                                 while (!oligosFile.eof())       {       char c = oligosFile.get(); if (c == 10 || c == 13){     break;  }       } // get rest of line if there's any crap there
491                         }
492                         else{                           //there's a feature we're interested in
493
494                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }                                  //make type case insensitive
495
496                                 oligosFile >> oligo;    //get the DNA sequence for the feature
497
498                                 for(int i=0;i<oligo.length();i++){      //make type case insensitive and change any U's to T's
499                                         oligo[i] = toupper(oligo[i]);
500                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
501                                 }
502
503                                 if(type == "FORWARD"){  //if the feature is a forward primer...
504                                         group = "";
505
506                                         while (!oligosFile.eof())       {       // get rest of line in case there is a primer name = will have the name of the primer
507                                                 char c = oligosFile.get(); 
508                                                 if (c == 10 || c == 13){        break;  }
509                                                 else if (c == 32 || c == 9){;} //space or tab
510                                                 else {  group += c;  }
511                                         } 
512
513                                         //have we seen this primer already?
514                                         map<string, int>::iterator itPrimer = primers.find(oligo);
515                                         if (itPrimer != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
516
517                                         primers[oligo]=indexPrimer; indexPrimer++;
518                                         primerNameVector.push_back(group);
519
520                                 }
521                                 else if(type == "REVERSE"){
522                                         Sequence oligoRC("reverse", oligo);
523                                         oligoRC.reverseComplement();
524                                         revPrimer.push_back(oligoRC.getUnaligned());
525                                 }
526                                 else if(type == "BARCODE"){
527                                         oligosFile >> group;
528
529                                         //check for repeat barcodes
530                                         map<string, int>::iterator itBar = barcodes.find(oligo);
531                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
532
533                                         barcodes[oligo]=indexBarcode; indexBarcode++;
534                                         barcodeNameVector.push_back(group);
535                                 }
536                                 else{
537                                         m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  
538                                 }
539                         }
540
541                         m->gobble(oligosFile);
542                 }
543                 oligosFile.close();
544                 
545                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
546                 
547                 //add in potential combos
548                 if(barcodeNameVector.size() == 0){
549                         barcodes[""] = 0;
550                         barcodeNameVector.push_back("");                        
551                 }
552                 
553                 if(primerNameVector.size() == 0){
554                         primers[""] = 0;
555                         primerNameVector.push_back("");                 
556                 }
557                 
558                 
559                 outFlowFileNames.resize(barcodeNameVector.size());
560                 for(int i=0;i<outFlowFileNames.size();i++){
561                         outFlowFileNames[i].assign(primerNameVector.size(), "");
562                 }
563                 
564                 if(allFiles){
565
566                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
567                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
568
569                                         string primerName = primerNameVector[itPrimer->second];
570                                         string barcodeName = barcodeNameVector[itBar->second];
571                                                                                 
572                                         string comboGroupName = "";
573                                         string fileName = "";
574                                         
575                                         if(primerName == ""){
576                                                 comboGroupName = barcodeNameVector[itBar->second];
577                                                 fileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + comboGroupName + ".flow";
578                                         }
579                                         else{
580                                                 if(barcodeName == ""){
581                                                         comboGroupName = primerNameVector[itPrimer->second];
582                                                 }
583                                                 else{
584                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
585                                                 }
586                                                 fileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + comboGroupName + ".flow";
587                                         }
588                                         
589                                         outFlowFileNames[itBar->second][itPrimer->second] = fileName;
590                                         
591                                         ofstream temp;
592                                         m->openOutputFile(fileName, temp);
593                                         temp.close();
594                                 }
595                         }
596                 }
597                 
598                 numFPrimers = primers.size();
599                 numRPrimers = revPrimer.size();
600                 
601         }
602         catch(exception& e) {
603                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
604                 exit(1);
605         }
606 }
607 /**************************************************************************************************/
608 vector<unsigned long long> TrimFlowsCommand::getFlowFileBreaks() {
609
610         try{
611                         
612                 vector<unsigned long long> filePos;
613                 filePos.push_back(0);
614                                         
615                 FILE * pFile;
616                 unsigned long long size;
617                 
618                 //get num bytes in file
619                 pFile = fopen (flowFileName.c_str(),"rb");
620                 if (pFile==NULL) perror ("Error opening file");
621                 else{
622                         fseek (pFile, 0, SEEK_END);
623                         size=ftell (pFile);
624                         fclose (pFile);
625                 }
626                                 
627                 //estimate file breaks
628                 unsigned long long chunkSize = 0;
629                 chunkSize = size / processors;
630
631                 //file too small to divide by processors
632                 if (chunkSize == 0)  {  processors = 1; filePos.push_back(size); return filePos;        }
633                 
634                 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
635                 for (int i = 0; i < processors; i++) {
636                         unsigned long long spot = (i+1) * chunkSize;
637                         
638                         ifstream in;
639                         m->openInputFile(flowFileName, in);
640                         in.seekg(spot);
641                         
642                         string dummy = m->getline(in);
643                         
644                         //there was not another sequence before the end of the file
645                         unsigned long long sanityPos = in.tellg();
646                         
647 //                      if (sanityPos == -1) {  break;  }
648 //                      else {  filePos.push_back(newSpot);  }
649                         if (sanityPos == -1) {  break;  }
650                         else {  filePos.push_back(sanityPos);  }
651                         
652                         in.close();
653                 }
654                 
655                 //save end pos
656                 filePos.push_back(size);
657                 
658                 //sanity check filePos
659                 for (int i = 0; i < (filePos.size()-1); i++) {
660                         if (filePos[(i+1)] <= filePos[i]) {  filePos.erase(filePos.begin()+(i+1)); i--; }
661                 }
662
663                 ifstream in;
664                 m->openInputFile(flowFileName, in);
665                 in >> numFlows;
666                 m->gobble(in);
667                 //unsigned long long spot = in.tellg();
668                 //filePos[0] = spot;
669                 in.close();
670                 
671                 processors = (filePos.size() - 1);
672                 
673                 return filePos; 
674         }
675         catch(exception& e) {
676                 m->errorOut(e, "TrimSeqsCommand", "getFlowFileBreaks");
677                 exit(1);
678         }
679 }
680
681 /**************************************************************************************************/
682
683 int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trimFlowFileName, string scrapFlowFileName, string fastaFileName, vector<vector<string> > barcodePrimerComboFileNames){
684
685         try {
686                 processIDS.clear();
687                 int exitCommand = 1;
688                 
689 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
690                 int process = 1;
691                 
692                 //loop through and create all the processes you want
693                 while (process != processors) {
694                         int pid = fork();
695                         
696                         if (pid > 0) {
697                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
698                                 process++;
699                         }else if (pid == 0){
700                                 
701                                 vector<vector<string> > tempBarcodePrimerComboFileNames = barcodePrimerComboFileNames;
702                                 if(allFiles){
703                                         for(int i=0;i<tempBarcodePrimerComboFileNames.size();i++){
704                                                 for(int j=0;j<tempBarcodePrimerComboFileNames[0].size();j++){
705                                                         tempBarcodePrimerComboFileNames[i][j] += toString(getpid()) + ".temp";
706                                                         ofstream temp;
707                                                         m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp);
708                                                         temp.close();
709                                                         
710                                                 }
711                                         }
712                                 }
713                                 driverCreateTrim(flowFileName,
714                                                                  (trimFlowFileName + toString(getpid()) + ".temp"),
715                                                                  (scrapFlowFileName + toString(getpid()) + ".temp"),
716                                                                  (fastaFileName + toString(getpid()) + ".temp"),
717                                                                  tempBarcodePrimerComboFileNames, lines[process]);
718
719                                 exit(0);
720                         }else { 
721                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
722                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
723                                 exit(0);
724                         }
725                 }
726                 
727                 //parent do my part
728                 ofstream temp;
729                 m->openOutputFile(trimFlowFileName, temp);
730                 temp.close();
731
732                 m->openOutputFile(scrapFlowFileName, temp);
733                 temp.close();
734                 
735                 if(fasta){
736                         m->openOutputFile(fastaFileName, temp);
737                         temp.close();
738                 }
739                 
740                 driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames, lines[0]);
741
742                 //force parent to wait until all the processes are done
743                 for (int i=0;i<processIDS.size();i++) { 
744                         int temp = processIDS[i];
745                         wait(&temp);
746                 }
747 #else
748                 //////////////////////////////////////////////////////////////////////////////////////////////////////
749                 //Windows version shared memory, so be careful when passing variables through the trimFlowData struct. 
750                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
751                 //////////////////////////////////////////////////////////////////////////////////////////////////////
752                 
753                 vector<trimFlowData*> pDataArray; 
754                 DWORD   dwThreadIdArray[processors-1];
755                 HANDLE  hThreadArray[processors-1]; 
756                 
757                 //Create processor worker threads.
758                 for( int i=0; i<processors-1; i++ ){
759                         // Allocate memory for thread data.
760                         string extension = "";
761                         if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
762                         
763                         vector<vector<string> > tempBarcodePrimerComboFileNames = barcodePrimerComboFileNames;
764                         if(allFiles){
765                                 for(int i=0;i<tempBarcodePrimerComboFileNames.size();i++){
766                                         for(int j=0;j<tempBarcodePrimerComboFileNames[0].size();j++){
767                                                 tempBarcodePrimerComboFileNames[i][j] += extension;
768                                                 ofstream temp;
769                                                 m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp);
770                                                 temp.close();
771                                                 
772                                         }
773                                 }
774                         }
775                         
776                         trimFlowData* tempflow = new trimFlowData(flowFileName, (trimFlowFileName + extension), (scrapFlowFileName + extension), fastaFileName, flowOrder, tempBarcodePrimerComboFileNames, barcodes, primers, revPrimer, fasta, allFiles, lines[i]->start, lines[i]->end, m, signal, noise, numFlows, maxFlows, minFlows, maxHomoP, tdiffs, bdiffs, pdiffs, i);
777                         pDataArray.push_back(tempflow);
778                         
779                         //MyTrimFlowThreadFunction is in header. It must be global or static to work with the threads.
780                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
781                         hThreadArray[i] = CreateThread(NULL, 0, MyTrimFlowThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
782                 }
783                 
784                 //using the main process as a worker saves time and memory
785                 ofstream temp;
786                 m->openOutputFile(trimFlowFileName, temp);
787                 temp.close();
788                 
789                 m->openOutputFile(scrapFlowFileName, temp);
790                 temp.close();
791                 
792                 if(fasta){
793                         m->openOutputFile(fastaFileName, temp);
794                         temp.close();
795                 }
796                 
797                 vector<vector<string> > tempBarcodePrimerComboFileNames = barcodePrimerComboFileNames;
798                 if(allFiles){
799                         for(int i=0;i<tempBarcodePrimerComboFileNames.size();i++){
800                                 for(int j=0;j<tempBarcodePrimerComboFileNames[0].size();j++){
801                                         tempBarcodePrimerComboFileNames[i][j] += toString(processors-1) + ".temp";
802                                         ofstream temp;
803                                         m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp);
804                                         temp.close();
805                                         
806                                 }
807                         }
808                 }
809                 
810                 //do my part - do last piece because windows is looking for eof
811                 int num = driverCreateTrim(flowFileName, (trimFlowFileName  + toString(processors-1) + ".temp"), (scrapFlowFileName  + toString(processors-1) + ".temp"), (fastaFileName + toString(processors-1) + ".temp"), tempBarcodePrimerComboFileNames, lines[processors-1]);
812                 processIDS.push_back((processors-1)); 
813                 
814                 //Wait until all threads have terminated.
815                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
816                 
817                 //Close all thread handles and free memory allocations.
818                 for(int i=0; i < pDataArray.size(); i++){
819                         num += pDataArray[i]->count;
820                         CloseHandle(hThreadArray[i]);
821                         delete pDataArray[i];
822                 }
823                 
824                 
825 #endif  
826                 //append files
827                 m->mothurOutEndLine();
828                 for(int i=0;i<processIDS.size();i++){
829                         
830                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
831                         
832                         m->appendFiles((trimFlowFileName + toString(processIDS[i]) + ".temp"), trimFlowFileName);
833                         m->mothurRemove((trimFlowFileName + toString(processIDS[i]) + ".temp"));
834 //                      m->mothurOut("\tDone with trim.flow file"); m->mothurOutEndLine();
835
836                         m->appendFiles((scrapFlowFileName + toString(processIDS[i]) + ".temp"), scrapFlowFileName);
837                         m->mothurRemove((scrapFlowFileName + toString(processIDS[i]) + ".temp"));
838 //                      m->mothurOut("\tDone with scrap.flow file"); m->mothurOutEndLine();
839
840                         if(fasta){
841                                 m->appendFiles((fastaFileName + toString(processIDS[i]) + ".temp"), fastaFileName);
842                                 m->mothurRemove((fastaFileName + toString(processIDS[i]) + ".temp"));
843 //                              m->mothurOut("\tDone with flow.fasta file"); m->mothurOutEndLine();
844                         }
845                         if(allFiles){                                           
846                                 for (int j = 0; j < barcodePrimerComboFileNames.size(); j++) {
847                                         for (int k = 0; k < barcodePrimerComboFileNames[0].size(); k++) {
848                                                 m->appendFiles((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp"), barcodePrimerComboFileNames[j][k]);
849                                                 m->mothurRemove((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp"));
850                                         }
851                                 }
852                         }
853                 }
854                 
855                 return exitCommand;
856         
857         }
858         catch(exception& e) {
859                 m->errorOut(e, "TrimFlowsCommand", "createProcessesCreateTrim");
860                 exit(1);
861         }
862 }
863
864 //***************************************************************************************************************