]> git.donarmstrong.com Git - mothur.git/blob - trimflowscommand.cpp
removed various build warnings
[mothur.git] / trimflowscommand.cpp
1 /*
2  *  trimflowscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 12/22/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "trimflowscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //**********************************************************************************************************************
14
15 vector<string> TrimFlowsCommand::getValidParameters(){  
16         try {
17                 string Array[] =  {"flow", "totalflows", "minflows",
18                         "fasta", "minlength", "maxlength", "maxhomop", "signal", "noise"
19                         "oligos", "pdiffs", "bdiffs", "tdiffs", 
20                         "allfiles", "processors",
21
22
23                         
24 //                      "group",
25                         "outputdir","inputdir"
26                 
27                 };
28                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "TrimFlowsCommand", "getValidParameters");
33                 exit(1);
34         }
35 }
36
37 //**********************************************************************************************************************
38
39 vector<string> TrimFlowsCommand::getRequiredParameters(){       
40         try {
41                 string Array[] =  {"flow"};
42                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
43                 return myArray;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "TrimFlowsCommand", "getRequiredParameters");
47                 exit(1);
48         }
49 }
50
51 //**********************************************************************************************************************
52
53 vector<string> TrimFlowsCommand::getRequiredFiles(){    
54         try {
55                 vector<string> myArray;
56                 return myArray;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "TrimFlowsCommand", "getRequiredFiles");
60                 exit(1);
61         }
62 }
63
64 //**********************************************************************************************************************
65
66 TrimFlowsCommand::TrimFlowsCommand(){   
67         try {
68                 abort = true;
69                 //initialize outputTypes
70                 vector<string> tempOutNames;
71                 outputTypes["flow"] = tempOutNames;
72                 outputTypes["fasta"] = tempOutNames;
73         }
74         catch(exception& e) {
75                 m->errorOut(e, "TrimFlowsCommand", "TrimFlowsCommand");
76                 exit(1);
77         }
78 }
79
80 //***************************************************************************************************************
81
82 TrimFlowsCommand::~TrimFlowsCommand(){  /*      do nothing      */      }
83
84 //***************************************************************************************************************
85
86 void TrimFlowsCommand::help(){
87         try {
88                 m->mothurOut("The trim.flows command reads a flowgram file and creates .....\n");
89                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
90                 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.flows.\n\n");
91                 
92         }
93         catch(exception& e) {
94                 m->errorOut(e, "TrimFlowsCommand", "help");
95                 exit(1);
96         }
97 }
98
99 //**********************************************************************************************************************
100
101 TrimFlowsCommand::TrimFlowsCommand(string option)  {
102         try {
103                 
104                 abort = false;
105                 comboStarts = 0;
106                 
107                 //allow user to run help
108                 if(option == "help") { help(); abort = true; }
109                 
110                 else {
111                         //valid paramters for this command
112                         string AlignArray[] =  {"flow", "totalflows", "minflows",
113                                 "fasta", "minlength", "maxlength", "maxhomop", "signal", "noise",
114                                 "oligos", "pdiffs", "bdiffs", "tdiffs", 
115                                 "allfiles", "processors",
116                 
117                                 //                      "group",
118                                 "outputdir","inputdir"
119                                 
120                         };
121                         
122                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
123                         
124                         OptionParser parser(option);
125                         map<string,string> parameters = parser.getParameters();
126                         
127                         ValidParameters validParameter;
128                         map<string,string>::iterator it;
129                         
130                         //check to make sure all parameters are valid for command
131                         for (it = parameters.begin(); it != parameters.end(); it++) { 
132                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
133                         }
134                         
135                         //initialize outputTypes
136                         vector<string> tempOutNames;
137                         outputTypes["flow"] = tempOutNames;
138                         outputTypes["fasta"] = tempOutNames;
139                         
140                         //if the user changes the input directory command factory will send this info to us in the output parameter 
141                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
142                         if (inputDir == "not found"){   inputDir = "";          }
143                         else {
144                                 string path;
145                                 it = parameters.find("flow");
146                                 //user has given a template file
147                                 if(it != parameters.end()){ 
148                                         path = m->hasPath(it->second);
149                                         //if the user has not given a path then, add inputdir. else leave path alone.
150                                         if (path == "") {       parameters["flow"] = inputDir + it->second;             }
151                                 }
152                                 
153                                 it = parameters.find("oligos");
154                                 //user has given a template file
155                                 if(it != parameters.end()){ 
156                                         path = m->hasPath(it->second);
157                                         //if the user has not given a path then, add inputdir. else leave path alone.
158                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
159                                 }
160                                 
161                                 
162 //                              it = parameters.find("group");
163 //                              //user has given a template file
164 //                              if(it != parameters.end()){ 
165 //                                      path = m->hasPath(it->second);
166 //                                      //if the user has not given a path then, add inputdir. else leave path alone.
167 //                                      if (path == "") {       parameters["group"] = inputDir + it->second;            }
168 //                              }
169                         }
170                         
171                         
172                         //check for required parameters
173                         flowFileName = validParameter.validFile(parameters, "flow", true);
174                         if (flowFileName == "not found") { m->mothurOut("flow is a required parameter for the trim.flows command."); m->mothurOutEndLine(); abort = true; }
175                         else if (flowFileName == "not open") { abort = true; }  
176                         
177                         //if the user changes the output directory command factory will send this info to us in the output parameter 
178                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
179                                 outputDir = ""; 
180                                 outputDir += m->hasPath(flowFileName); //if user entered a file with a path then preserve it    
181                         }
182                         
183                         
184                         //check for optional parameter and set defaults
185                         // ...at some point should added some additional type checking...
186                         
187                         string temp;
188                         temp = validParameter.validFile(parameters, "minflows", false);         if (temp == "not found") { temp = "360"; }
189                         convert(temp, minFlows);  
190
191                         temp = validParameter.validFile(parameters, "totalflows", false);       if (temp == "not found") { temp = "720"; }
192                         convert(temp, totalFlows);  
193                         
194                         
195                         temp = validParameter.validFile(parameters, "oligos", true);
196                         if (temp == "not found")        {       oligoFileName = "";             }
197                         else if(temp == "not open")     {       abort = true;                   } 
198                         else                                            {       oligoFileName = temp;   }
199                         
200                         temp = validParameter.validFile(parameters, "fasta", false);            if (temp == "not found"){       fasta = 0;              }
201                         else if(m->isTrue(temp))        {       fasta = 1;      }
202                         
203                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found"){       temp = "9";             }
204                         convert(temp, maxHomoP);  
205
206                         temp = validParameter.validFile(parameters, "signal", false);           if (temp == "not found"){       temp = "0.50";  }
207                         convert(temp, signal);  
208
209                         temp = validParameter.validFile(parameters, "noise", false);            if (temp == "not found"){       temp = "0.70";  }
210                         convert(temp, noise);  
211
212                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found"){       temp = "0";             }
213                         convert(temp, minLength); 
214                         
215                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found"){       temp = "0";             }
216                         convert(temp, maxLength);
217                         
218                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found"){       temp = "0";             }
219                         convert(temp, bdiffs);
220                         
221                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found"){       temp = "0";             }
222                         convert(temp, pdiffs);
223                         
224                         temp = validParameter.validFile(parameters, "tdiffs", false);
225                         if (temp == "not found"){ int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
226                         convert(temp, tdiffs);
227                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
228                         
229                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "T";          }
230                         allFiles = m->isTrue(temp);
231                         
232                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found") { temp = "1"; }
233                         convert(temp, processors); 
234                         
235                         if(oligoFileName == ""){        allFiles = 0;           }
236
237                         numFPrimers = 0;
238                         numRPrimers = 0;
239                 }
240                 
241         }
242         catch(exception& e) {
243                 m->errorOut(e, "TrimFlowsCommand", "TrimSeqsCommand");
244                 exit(1);
245         }
246 }
247
248 //***************************************************************************************************************
249
250 int TrimFlowsCommand::execute(){
251         try{
252                 
253                 if (abort == true) { return 0; }
254
255                 string trimFlowFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "trim.flow";
256                 outputNames.push_back(trimFlowFileName); outputTypes["flow"].push_back(trimFlowFileName);
257                 
258                 string scrapFlowFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "scrap.flow";
259                 outputNames.push_back(scrapFlowFileName); outputTypes["flow"].push_back(scrapFlowFileName);
260
261                 string fastaFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.fasta";
262                 if(fasta){
263                         outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName);
264                 }
265                 
266                 vector<unsigned long int> flowFilePos = getFlowFileBreaks();
267                 for (int i = 0; i < (flowFilePos.size()-1); i++) {
268                         lines.push_back(new linePair(flowFilePos[i], flowFilePos[(i+1)]));
269                 }       
270
271                 vector<vector<string> > barcodePrimerComboFileNames;
272                 if(oligoFileName != ""){
273                         getOligos(barcodePrimerComboFileNames); 
274                 }
275                 
276 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
277                 if(processors == 1){
278                         driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames, lines[0]);
279                 }else{
280                         createProcessesCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames); 
281                 }       
282 #else
283                 driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames, lines[0]);
284 #endif
285                 
286                 if (m->control_pressed) {  return 0; }                  
287                 
288                 if(allFiles){
289                         
290                         ofstream output;
291                         string flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.files";
292                         m->openOutputFile(flowFilesFileName, output);
293
294                         for(int i=0;i<barcodePrimerComboFileNames.size();i++){
295                                 for(int j=0;j<barcodePrimerComboFileNames[0].size();j++){
296                                         
297                                         FILE * pFile;
298                                         unsigned long int size;
299                                         
300                                         //get num bytes in file
301                                         pFile = fopen (barcodePrimerComboFileNames[i][j].c_str(),"rb");
302                                         if (pFile==NULL) perror ("Error opening file");
303                                         else{
304                                                 fseek (pFile, 0, SEEK_END);
305                                                 size=ftell (pFile);
306                                                 fclose (pFile);
307                                         }
308
309                                         if(size < 10){
310 //                                              m->mothurOut("deleting: " + barcodePrimerComboFileNames[i][j] + '\n');
311                                                 remove(barcodePrimerComboFileNames[i][j].c_str());
312                                         }
313                                         else{
314 //                                              m->mothurOut("saving: " + barcodePrimerComboFileNames[i][j] + '\n');
315                                                 output << barcodePrimerComboFileNames[i][j] << endl;
316                                         }
317                                 }
318                         }
319                         output.close();
320                 }
321                 
322                 m->mothurOutEndLine();
323                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
324                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
325                 m->mothurOutEndLine();
326                 
327                 return 0;       
328         }
329         catch(exception& e) {
330                 m->errorOut(e, "TrimSeqsCommand", "execute");
331                 exit(1);
332         }
333 }
334
335 //***************************************************************************************************************
336
337 int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileName, string scrapFlowFileName, string fastaFileName, vector<vector<string> > barcodePrimerComboFileNames, linePair* line){
338         
339         try {
340                 
341                 ofstream trimFlowFile;
342                 m->openOutputFile(trimFlowFileName, trimFlowFile);
343                 trimFlowFile.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint);
344
345                 ofstream scrapFlowFile;
346                 m->openOutputFile(scrapFlowFileName, scrapFlowFile);
347                 scrapFlowFile.setf(ios::fixed, ios::floatfield); scrapFlowFile.setf(ios::showpoint);
348
349                 if(line->start == 4){
350                         scrapFlowFile << totalFlows << endl;
351                         trimFlowFile << totalFlows << endl;
352                         for(int i=0;i<barcodePrimerComboFileNames.size();i++){
353                                 for(int j=0;j<barcodePrimerComboFileNames[0].size();j++){
354                                         //                              barcodePrimerComboFileNames[i][j] += toString(getpid()) + ".temp";
355                                         ofstream temp;
356                                         m->openOutputFile(barcodePrimerComboFileNames[i][j], temp);
357                                         temp << totalFlows << endl;
358                                         temp.close();
359                                 }
360                         }                       
361                 }
362                 
363                 FlowData flowData(numFlows, signal, noise, maxHomoP);
364                 
365                 ofstream fastaFile;
366                 if(fasta){      m->openOutputFile(fastaFileName, fastaFile);    }
367                 
368                 ifstream flowFile;
369                 m->openInputFile(flowFileName, flowFile);
370                 
371                 flowFile.seekg(line->start);
372
373                 int count = 0;
374                 bool moreSeqs = 1;
375                 
376                 while(moreSeqs) {
377                         
378                         int success = 1;
379                         int currentSeqDiffs = 0;
380                         string trashCode = "";
381                         
382                         flowData.getNext(flowFile);
383                         flowData.capFlows(totalFlows);  
384                         
385                         Sequence currSeq = flowData.getSequence();
386                         if(!flowData.hasMinFlows(minFlows)){    //screen to see if sequence is of a minimum number of flows
387                                 success = 0;
388                                 trashCode += 'l';
389                         }
390                         
391                         if(minLength > 0 || maxLength > 0){     //screen to see if sequence is above and below a specific number of bases
392                                 int seqLength = currSeq.getNumBases();
393                                 if(seqLength < minLength || seqLength > maxLength){
394                                         success = 0;
395                                         trashCode += 'l';
396                                 }
397                         }
398                         
399                         int primerIndex, barcodeIndex;
400                         
401                         if(barcodes.size() != 0){
402                                 success = stripBarcode(currSeq, barcodeIndex);
403                                 if(success > bdiffs)            {       trashCode += 'b';       }
404                                 else{ currentSeqDiffs += success;  }
405                         }
406                         
407                         if(numFPrimers != 0){
408                                 success = stripForward(currSeq, primerIndex);
409                                 if(success > pdiffs)            {       trashCode += 'f';       }
410                                 else{ currentSeqDiffs += success;  }
411                         }
412                         
413                         if (currentSeqDiffs > tdiffs)   {       trashCode += 't';   }
414                         
415                         if(numRPrimers != 0){
416                                 success = stripReverse(currSeq);
417                                 if(!success)                            {       trashCode += 'r';       }
418                         }
419                                                 
420                                 
421                         if(trashCode.length() == 0){
422                                 flowData.printFlows(trimFlowFile);
423                                 
424                                 if(fasta){
425                                         currSeq.printSequence(fastaFile);
426                                 }
427                                 
428                                 if(barcodes.size() != 0 || primers.size() != 0){
429 //                                      string fileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + barcodePrimerCombos[barcodeIndex][primerIndex] + ".flow";
430                                         ofstream output;
431                                         m->openOutputFileAppend(barcodePrimerComboFileNames[barcodeIndex][primerIndex], output);
432                                         output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint);
433                                         
434                                         flowData.printFlows(output);
435                                         output.close();
436                                 }
437                                 
438                         }
439                         else{
440                                 flowData.printFlows(scrapFlowFile, trashCode);
441                         }
442                                 
443                         count++;
444                                                 
445                         //report progress
446                         if((count) % 10000 == 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
447
448 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
449                         unsigned long int pos = flowFile.tellg();
450
451                         if ((pos == -1) || (pos >= line->end)) { break; }
452 #else
453                         if (flowFile.eof()) { break; }
454 #endif
455                         
456                 }
457                 //report progress
458                 if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
459                 
460                 trimFlowFile.close();
461                 scrapFlowFile.close();
462                 flowFile.close();
463                 if(fasta){      fastaFile.close();      }
464                 
465                 return count;
466         }
467         catch(exception& e) {
468                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
469                 exit(1);
470         }
471 }
472
473 //***************************************************************************************************************
474
475 void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
476         try {
477                 ifstream oligosFile;
478                 m->openInputFile(oligoFileName, oligosFile);
479                 
480                 string type, oligo, group;
481
482                 int indexPrimer = 0;
483                 int indexBarcode = 0;
484                 
485                 while(!oligosFile.eof()){
486                 
487                         oligosFile >> type; m->gobble(oligosFile);      //get the first column value of the row - is it a comment or a feature we are interested in?
488
489                         if(type[0] == '#'){     //igore the line because there's a comment
490                                 while (!oligosFile.eof())       {       char c = oligosFile.get(); if (c == 10 || c == 13){     break;  }       } // get rest of line if there's any crap there
491                         }
492                         else{                           //there's a feature we're interested in
493
494                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }                                  //make type case insensitive
495
496                                 oligosFile >> oligo;    //get the DNA sequence for the feature
497
498                                 for(int i=0;i<oligo.length();i++){      //make type case insensitive and change any U's to T's
499                                         oligo[i] = toupper(oligo[i]);
500                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
501                                 }
502
503                                 if(type == "FORWARD"){  //if the feature is a forward primer...
504                                         group = "";
505
506                                         while (!oligosFile.eof())       {       // get rest of line in case there is a primer name = will have the name of the primer
507                                                 char c = oligosFile.get(); 
508                                                 if (c == 10 || c == 13){        break;  }
509                                                 else if (c == 32 || c == 9){;} //space or tab
510                                                 else {  group += c;  }
511                                         } 
512
513                                         //have we seen this primer already?
514                                         map<string, int>::iterator itPrimer = primers.find(oligo);
515                                         if (itPrimer != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
516
517                                         primers[oligo]=indexPrimer; indexPrimer++;
518                                         primerNameVector.push_back(group);
519
520                                 }
521                                 else if(type == "REVERSE"){
522                                         Sequence oligoRC("reverse", oligo);
523                                         oligoRC.reverseComplement();
524                                         revPrimer.push_back(oligoRC.getUnaligned());
525                                 }
526                                 else if(type == "BARCODE"){
527                                         oligosFile >> group;
528
529                                         //check for repeat barcodes
530                                         map<string, int>::iterator itBar = barcodes.find(oligo);
531                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
532
533                                         barcodes[oligo]=indexBarcode; indexBarcode++;
534                                         barcodeNameVector.push_back(group);
535                                 }
536                                 else{
537                                         m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  
538                                 }
539                         }
540
541                         m->gobble(oligosFile);
542                 }
543                 oligosFile.close();
544                 
545                 //add in potential combos       
546                 outFlowFileNames.resize(barcodeNameVector.size());
547                 for(int i=0;i<outFlowFileNames.size();i++){
548                         outFlowFileNames[i].assign(primerNameVector.size(), "");
549                 }
550                 
551                 if(allFiles){
552                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
553                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
554
555                                         string primerName = primerNameVector[itPrimer->second];
556                                         string barcodeName = barcodeNameVector[itBar->second];
557                                                                                 
558                                         string comboGroupName = "";
559                                         string fileName = "";
560                                         
561                                         if(primerName == ""){
562                                                 comboGroupName = barcodeNameVector[itBar->second];
563                                                 fileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + comboGroupName + ".flow";
564                                         }
565                                         else{
566                                                 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
567                                                 fileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + comboGroupName + ".flow";
568                                         }
569                                         
570                                         outputNames.push_back(fileName);
571                                         outputTypes["flow"].push_back(fileName);
572                                         outFlowFileNames[itBar->second][itPrimer->second] = fileName;
573                                         
574                                         ofstream temp;
575                                         m->openOutputFile(fileName, temp);
576                                         temp.close();
577                                 }
578                         }
579                 }
580                 
581                 numFPrimers = primers.size();
582                 numRPrimers = revPrimer.size();
583                 
584         }
585         catch(exception& e) {
586                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
587                 exit(1);
588         }
589 }
590
591 //***************************************************************************************************************
592
593 int TrimFlowsCommand::stripBarcode(Sequence& seq, int& group){
594         try {
595                 
596                 string rawSequence = seq.getUnaligned();
597                 int success = bdiffs + 1;       //guilty until proven innocent
598                 
599                 //can you find the barcode
600                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
601                         string oligo = it->first;
602                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
603                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
604                                 break;  
605                         }
606                         
607                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
608                                 group = it->second;
609                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
610                                 
611                                 success = 0;
612                                 break;
613                         }
614                 }
615                 
616                 //if you found the barcode or if you don't want to allow for diffs
617                 if ((bdiffs == 0) || (success == 0)) { return success;  }
618                 
619                 else { //try aligning and see if you can find it
620                         
621                         int maxLength = 0;
622                         
623                         Alignment* alignment;
624                         if (barcodes.size() > 0) {
625                                 map<string,int>::iterator it=barcodes.begin();
626                                 
627                                 for(it;it!=barcodes.end();it++){
628                                         if(it->first.length() > maxLength){
629                                                 maxLength = it->first.length();
630                                         }
631                                 }
632                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
633                                 
634                         }else{ alignment = NULL; } 
635                         
636                         //can you find the barcode
637                         int minDiff = 1e6;
638                         int minCount = 1;
639                         int minGroup = -1;
640                         int minPos = 0;
641                         
642                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
643                                 string oligo = it->first;
644                                 //                              int length = oligo.length();
645                                 
646                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
647                                         success = bdiffs + 10;
648                                         break;
649                                 }
650                                 
651                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
652                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
653                                 oligo = alignment->getSeqAAln();
654                                 string temp = alignment->getSeqBAln();
655                                 
656                                 int alnLength = oligo.length();
657                                 
658                                 for(int i=oligo.length()-1;i>=0;i--){
659                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
660                                 }
661                                 oligo = oligo.substr(0,alnLength);
662                                 temp = temp.substr(0,alnLength);
663                                 
664                                 int numDiff = countDiffs(oligo, temp);
665                                 
666                                 if(numDiff < minDiff){
667                                         minDiff = numDiff;
668                                         minCount = 1;
669                                         minGroup = it->second;
670                                         minPos = 0;
671                                         for(int i=0;i<alnLength;i++){
672                                                 if(temp[i] != '-'){
673                                                         minPos++;
674                                                 }
675                                         }
676                                 }
677                                 else if(numDiff == minDiff){
678                                         minCount++;
679                                 }
680                                 
681                         }
682                         
683                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
684                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
685                         else{                                                                                                   //use the best match
686                                 group = minGroup;
687                                 seq.setUnaligned(rawSequence.substr(minPos));
688                                 success = minDiff;
689                         }
690                         
691                         if (alignment != NULL) {  delete alignment;  }
692                         
693                 }
694                 
695                 return success;
696                 
697         }
698         catch(exception& e) {
699                 m->errorOut(e, "TrimFlowsCommand", "stripBarcode");
700                 exit(1);
701         }
702         
703 }
704
705 //***************************************************************************************************************
706
707 int TrimFlowsCommand::stripForward(Sequence& seq, int& group){
708         try {
709                 
710                 string rawSequence = seq.getUnaligned();
711                 int success = pdiffs + 1;       //guilty until proven innocent
712                 
713                 //can you find the primer
714                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
715                         string oligo = it->first;
716                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
717                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
718                                 break;  
719                         }
720                         
721                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
722                                 group = it->second;
723                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
724                                 success = 0;
725                                 break;
726                         }
727                 }
728                 
729                 //if you found the barcode or if you don't want to allow for diffs
730                 if ((pdiffs == 0) || (success == 0)) { return success;  }
731                 
732                 else { //try aligning and see if you can find it
733                         
734                         int maxLength = 0;
735                         
736                         Alignment* alignment;
737                         if (primers.size() > 0) {
738                                 map<string,int>::iterator it=primers.begin();
739                                 
740                                 for(it;it!=primers.end();it++){
741                                         if(it->first.length() > maxLength){
742                                                 maxLength = it->first.length();
743                                         }
744                                 }
745                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
746                                 
747                         }else{ alignment = NULL; } 
748                         
749                         //can you find the barcode
750                         int minDiff = 1e6;
751                         int minCount = 1;
752                         int minGroup = -1;
753                         int minPos = 0;
754                         
755                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
756                                 string oligo = it->first;
757                                 //                              int length = oligo.length();
758                                 
759                                 if(rawSequence.length() < maxLength){   
760                                         success = pdiffs + 100;
761                                         break;
762                                 }
763                                 
764                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
765                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
766                                 oligo = alignment->getSeqAAln();
767                                 string temp = alignment->getSeqBAln();
768                                 
769                                 int alnLength = oligo.length();
770                                 
771                                 for(int i=oligo.length()-1;i>=0;i--){
772                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
773                                 }
774                                 oligo = oligo.substr(0,alnLength);
775                                 temp = temp.substr(0,alnLength);
776                                 
777                                 int numDiff = countDiffs(oligo, temp);
778                                 
779                                 if(numDiff < minDiff){
780                                         minDiff = numDiff;
781                                         minCount = 1;
782                                         minGroup = it->second;
783                                         minPos = 0;
784                                         for(int i=0;i<alnLength;i++){
785                                                 if(temp[i] != '-'){
786                                                         minPos++;
787                                                 }
788                                         }
789                                 }
790                                 else if(numDiff == minDiff){
791                                         minCount++;
792                                 }
793                                 
794                         }
795                         
796                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
797                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
798                         else{                                                                                                   //use the best match
799                                 group = minGroup;
800                                 seq.setUnaligned(rawSequence.substr(minPos));
801                                 success = minDiff;
802                         }
803                         
804                         if (alignment != NULL) {  delete alignment;  }
805                         
806                 }
807                 
808                 return success;
809                 
810         }
811         catch(exception& e) {
812                 m->errorOut(e, "TrimFlowsCommand", "stripForward");
813                 exit(1);
814         }
815 }
816
817 //***************************************************************************************************************
818
819 bool TrimFlowsCommand::stripReverse(Sequence& seq){
820         try {
821
822                 string rawSequence = seq.getUnaligned();
823                 bool success = 0;       //guilty until proven innocent
824                 
825                 for(int i=0;i<numRPrimers;i++){
826                         string oligo = revPrimer[i];
827                         
828                         if(rawSequence.length() < oligo.length()){
829                                 success = 0;
830                                 break;
831                         }
832                         
833                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
834                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
835                                 success = 1;
836                                 break;
837                         }
838                 }       
839
840                 return success;
841                 
842         }
843         catch(exception& e) {
844                 m->errorOut(e, "TrimFlowsCommand", "stripReverse");
845                 exit(1);
846         }
847 }
848
849
850 //***************************************************************************************************************
851
852 bool TrimFlowsCommand::compareDNASeq(string oligo, string seq){
853         try {
854                 bool success = 1;
855                 int length = oligo.length();
856                 
857                 for(int i=0;i<length;i++){
858                         
859                         if(oligo[i] != seq[i]){
860                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
861                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
862                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
863                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
864                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
865                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
866                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
867                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
868                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
869                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
870                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
871                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
872                                 
873                                 if(success == 0)        {       break;   }
874                         }
875                         else{
876                                 success = 1;
877                         }
878                 }
879                 
880                 return success;
881         }
882         catch(exception& e) {
883                 m->errorOut(e, "TrimFlowsCommand", "compareDNASeq");
884                 exit(1);
885         }
886         
887 }
888
889 //***************************************************************************************************************
890
891 int TrimFlowsCommand::countDiffs(string oligo, string seq){
892         try {
893                 
894                 int length = oligo.length();
895                 int countDiffs = 0;
896                 
897                 for(int i=0;i<length;i++){
898                         
899                         if(oligo[i] != seq[i]){
900                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
901                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
902                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
903                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
904                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
905                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
906                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
907                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
908                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
909                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
910                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
911                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
912                         }
913                         
914                 }
915                 
916                 return countDiffs;
917         }
918         catch(exception& e) {
919                 m->errorOut(e, "TrimFlowsCommand", "countDiffs");
920                 exit(1);
921         }
922         
923 }
924
925 /**************************************************************************************************/
926
927 vector<unsigned long int> TrimFlowsCommand::getFlowFileBreaks() {
928
929         try{
930                         
931                 vector<unsigned long int> filePos;
932                 filePos.push_back(0);
933                                         
934                 FILE * pFile;
935                 unsigned long int size;
936                 
937                 //get num bytes in file
938                 pFile = fopen (flowFileName.c_str(),"rb");
939                 if (pFile==NULL) perror ("Error opening file");
940                 else{
941                         fseek (pFile, 0, SEEK_END);
942                         size=ftell (pFile);
943                         fclose (pFile);
944                 }
945                                 
946                 //estimate file breaks
947                 unsigned long int chunkSize = 0;
948                 chunkSize = size / processors;
949
950                 //file too small to divide by processors
951                 if (chunkSize == 0)  {  processors = 1; filePos.push_back(size); return filePos;        }
952                 
953                 //for each process seekg to closest file break and search for next '>' char. make that the filebreak
954                 for (int i = 0; i < processors; i++) {
955                         unsigned long int spot = (i+1) * chunkSize;
956                         
957                         ifstream in;
958                         m->openInputFile(flowFileName, in);
959                         in.seekg(spot);
960                         
961                         string dummy = m->getline(in);
962                         
963                         //there was not another sequence before the end of the file
964                         unsigned long int sanityPos = in.tellg();
965                         
966 //                      if (sanityPos == -1) {  break;  }
967 //                      else {  filePos.push_back(newSpot);  }
968                         if (sanityPos == -1) {  break;  }
969                         else {  filePos.push_back(sanityPos);  }
970                         
971                         in.close();
972                 }
973                 
974                 //save end pos
975                 filePos.push_back(size);
976                 
977                 //sanity check filePos
978                 for (int i = 0; i < (filePos.size()-1); i++) {
979                         if (filePos[(i+1)] <= filePos[i]) {  filePos.erase(filePos.begin()+(i+1)); i--; }
980                 }
981
982                 ifstream in;
983                 m->openInputFile(flowFileName, in);
984                 in >> numFlows;
985                 m->gobble(in);
986                 unsigned long int spot = in.tellg();
987                 filePos[0] = spot;
988                 in.close();
989                 
990                 processors = (filePos.size() - 1);
991                 
992                 return filePos; 
993         }
994         catch(exception& e) {
995                 m->errorOut(e, "TrimSeqsCommand", "getFlowFileBreaks");
996                 exit(1);
997         }
998 }
999
1000 /**************************************************************************************************/
1001
1002 int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trimFlowFileName, string scrapFlowFileName, string fastaFileName, vector<vector<string> > barcodePrimerComboFileNames){
1003
1004         try {
1005 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1006                 int process = 1;
1007                 int exitCommand = 1;
1008                 processIDS.clear();
1009                 
1010                 //loop through and create all the processes you want
1011                 while (process != processors) {
1012                         int pid = fork();
1013                         
1014                         if (pid > 0) {
1015                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1016                                 process++;
1017                         }else if (pid == 0){
1018                                 
1019                                 vector<vector<string> > tempBarcodePrimerComboFileNames = barcodePrimerComboFileNames;
1020                                 for(int i=0;i<tempBarcodePrimerComboFileNames.size();i++){
1021                                         for(int j=0;j<tempBarcodePrimerComboFileNames[0].size();j++){
1022                                                 tempBarcodePrimerComboFileNames[i][j] += toString(getpid()) + ".temp";
1023                                                 ofstream temp;
1024                                                 m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp);
1025                                                 temp.close();
1026                                                 
1027                                         }
1028                                 }
1029                                                 
1030                                 driverCreateTrim(flowFileName,
1031                                                                  (trimFlowFileName + toString(getpid()) + ".temp"),
1032                                                                  (scrapFlowFileName + toString(getpid()) + ".temp"),
1033                                                                  (fastaFileName + toString(getpid()) + ".temp"),
1034                                                                  tempBarcodePrimerComboFileNames, lines[process]);
1035
1036                                 exit(0);
1037                         }else { 
1038                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1039                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1040                                 exit(0);
1041                         }
1042                 }
1043                 
1044                 //parent do my part
1045                 ofstream temp;
1046                 m->openOutputFile(trimFlowFileName, temp);
1047                 temp.close();
1048
1049                 m->openOutputFile(scrapFlowFileName, temp);
1050                 temp.close();
1051                 
1052                 if(fasta){
1053                         m->openOutputFile(fastaFileName, temp);
1054                         temp.close();
1055                 }
1056                 
1057                 driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames, lines[0]);
1058
1059                 //force parent to wait until all the processes are done
1060                 for (int i=0;i<processIDS.size();i++) { 
1061                         int temp = processIDS[i];
1062                         wait(&temp);
1063                 }
1064                 
1065                 //append files
1066                 m->mothurOutEndLine();
1067                 for(int i=0;i<processIDS.size();i++){
1068                         
1069                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1070                         
1071                         m->appendFiles((trimFlowFileName + toString(processIDS[i]) + ".temp"), trimFlowFileName);
1072                         remove((trimFlowFileName + toString(processIDS[i]) + ".temp").c_str());
1073 //                      m->mothurOut("\tDone with trim.flow file"); m->mothurOutEndLine();
1074
1075                         m->appendFiles((scrapFlowFileName + toString(processIDS[i]) + ".temp"), scrapFlowFileName);
1076                         remove((scrapFlowFileName + toString(processIDS[i]) + ".temp").c_str());
1077 //                      m->mothurOut("\tDone with scrap.flow file"); m->mothurOutEndLine();
1078
1079                         if(fasta){
1080                                 m->appendFiles((fastaFileName + toString(processIDS[i]) + ".temp"), fastaFileName);
1081                                 remove((fastaFileName + toString(processIDS[i]) + ".temp").c_str());
1082 //                              m->mothurOut("\tDone with flow.fasta file"); m->mothurOutEndLine();
1083                         }
1084                         if(allFiles){                                           
1085                                 for (int j = 0; j < barcodePrimerComboFileNames.size(); j++) {
1086                                         for (int k = 0; k < barcodePrimerComboFileNames[0].size(); k++) {
1087                                                 m->appendFiles((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp"), barcodePrimerComboFileNames[j][k]);
1088                                                 remove((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
1089                                         }
1090                                 }
1091                         }
1092                 }
1093                 
1094                 return exitCommand;
1095 #endif          
1096         }
1097         catch(exception& e) {
1098                 m->errorOut(e, "TrimFlowsCommand", "createProcessesCreateTrim");
1099                 exit(1);
1100         }
1101 }
1102
1103 //***************************************************************************************************************