]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
mods to trim.seqs
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //**********************************************************************************************************************
14
15 vector<string> TrimSeqsCommand::getValidParameters(){   
16         try {
17                 string Array[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile", 
18                                                                         "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
19                                                                         "keepfirst", "removelast",
20                                                                         "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
21                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
22                 return myArray;
23         }
24         catch(exception& e) {
25                 m->errorOut(e, "TrimSeqsCommand", "getValidParameters");
26                 exit(1);
27         }
28 }
29
30 //**********************************************************************************************************************
31
32 TrimSeqsCommand::TrimSeqsCommand(){     
33         try {
34                 abort = true; calledHelp = true; 
35                 vector<string> tempOutNames;
36                 outputTypes["fasta"] = tempOutNames;
37                 outputTypes["qual"] = tempOutNames;
38                 outputTypes["group"] = tempOutNames;
39         }
40         catch(exception& e) {
41                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
42                 exit(1);
43         }
44 }
45
46 //**********************************************************************************************************************
47
48 vector<string> TrimSeqsCommand::getRequiredParameters(){        
49         try {
50                 string Array[] =  {"fasta"};
51                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
52                 return myArray;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "TrimSeqsCommand", "getRequiredParameters");
56                 exit(1);
57         }
58 }
59
60 //**********************************************************************************************************************
61
62 vector<string> TrimSeqsCommand::getRequiredFiles(){     
63         try {
64                 vector<string> myArray;
65                 return myArray;
66         }
67         catch(exception& e) {
68                 m->errorOut(e, "TrimSeqsCommand", "getRequiredFiles");
69                 exit(1);
70         }
71 }
72
73 //***************************************************************************************************************
74
75 TrimSeqsCommand::TrimSeqsCommand(string option)  {
76         try {
77                 
78                 abort = false; calledHelp = false;   
79                 comboStarts = 0;
80                 
81                 //allow user to run help
82                 if(option == "help") { help(); abort = true; calledHelp = true; }
83                 
84                 else {
85                         //valid paramters for this command
86                         string AlignArray[] =  {        "fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile", 
87                                                                 "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
88                                                                 "keepfirst", "removelast",
89                                                                 "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
90                         
91                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
92                         
93                         OptionParser parser(option);
94                         map<string,string> parameters = parser.getParameters();
95                         
96                         ValidParameters validParameter;
97                         map<string,string>::iterator it;
98                         
99                         //check to make sure all parameters are valid for command
100                         for (it = parameters.begin(); it != parameters.end(); it++) { 
101                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
102                         }
103                         
104                         //initialize outputTypes
105                         vector<string> tempOutNames;
106                         outputTypes["fasta"] = tempOutNames;
107                         outputTypes["qual"] = tempOutNames;
108                         outputTypes["group"] = tempOutNames;
109                         
110                         //if the user changes the input directory command factory will send this info to us in the output parameter 
111                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
112                         if (inputDir == "not found"){   inputDir = "";          }
113                         else {
114                                 string path;
115                                 it = parameters.find("fasta");
116                                 //user has given a template file
117                                 if(it != parameters.end()){ 
118                                         path = m->hasPath(it->second);
119                                         //if the user has not given a path then, add inputdir. else leave path alone.
120                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
121                                 }
122                                 
123                                 it = parameters.find("oligos");
124                                 //user has given a template file
125                                 if(it != parameters.end()){ 
126                                         path = m->hasPath(it->second);
127                                         //if the user has not given a path then, add inputdir. else leave path alone.
128                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
129                                 }
130                                 
131                                 it = parameters.find("qfile");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
137                                 }
138                                 
139                                 it = parameters.find("group");
140                                 //user has given a template file
141                                 if(it != parameters.end()){ 
142                                         path = m->hasPath(it->second);
143                                         //if the user has not given a path then, add inputdir. else leave path alone.
144                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
145                                 }
146                         }
147
148                         
149                         //check for required parameters
150                         fastaFile = validParameter.validFile(parameters, "fasta", true);
151                         if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the trim.seqs command."); m->mothurOutEndLine(); abort = true; }
152                         else if (fastaFile == "not open") { abort = true; }     
153                         
154                         //if the user changes the output directory command factory will send this info to us in the output parameter 
155                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
156                                 outputDir = ""; 
157                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
158                         }
159                 
160                         
161                         //check for optional parameter and set defaults
162                         // ...at some point should added some additional type checking...
163                         string temp;
164                         temp = validParameter.validFile(parameters, "flip", false);
165                         if (temp == "not found"){       flip = 0;       }
166                         else if(m->isTrue(temp))        {       flip = 1;       }
167                 
168                         temp = validParameter.validFile(parameters, "oligos", true);
169                         if (temp == "not found"){       oligoFile = "";         }
170                         else if(temp == "not open"){    abort = true;   } 
171                         else                                    {       oligoFile = temp;               }
172                         
173                         temp = validParameter.validFile(parameters, "group", true);
174                         if (temp == "not found"){       groupfile = "";         }
175                         else if(temp == "not open"){    abort = true;   } 
176                         else                                    {       groupfile = temp;               }
177                         
178                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
179                         convert(temp, maxAmbig);  
180
181                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
182                         convert(temp, maxHomoP);  
183
184                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
185                         convert(temp, minLength); 
186                         
187                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
188                         convert(temp, maxLength);
189                         
190                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
191                         convert(temp, bdiffs);
192                         
193                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
194                         convert(temp, pdiffs);
195                         
196                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
197                         convert(temp, tdiffs);
198                         
199                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
200                         
201                         temp = validParameter.validFile(parameters, "qfile", true);     
202                         if (temp == "not found")        {       qFileName = "";         }
203                         else if(temp == "not open")     {       abort = true;           }
204                         else                                            {       qFileName = temp;       }
205                         
206                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
207                         convert(temp, qThreshold);
208                         
209                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
210                         qtrim = m->isTrue(temp);
211
212                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
213                         convert(temp, qRollAverage);
214
215                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
216                         convert(temp, qWindowAverage);
217
218                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
219                         convert(temp, qWindowSize);
220
221                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
222                         convert(temp, qWindowStep);
223
224                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
225                         convert(temp, qAverage);
226
227                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
228                         convert(temp, keepFirst);
229
230                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
231                         convert(temp, removeLast);
232                         
233                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
234                         allFiles = m->isTrue(temp);
235                         
236                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found") { temp = "1"; }
237                         convert(temp, processors); 
238                         
239                         if ((oligoFile != "") && (groupfile != "")) {
240                                 m->mothurOut("You given both a oligos file and a groupfile, only one is allowed."); m->mothurOutEndLine(); abort = true;
241                         }
242                                                                                                 
243                         
244                         if(allFiles && (oligoFile == "") && (groupfile == "")){
245                                 m->mothurOut("You selected allfiles, but didn't enter an oligos or group file.  Ignoring the allfiles request."); m->mothurOutEndLine();
246                         }
247                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
248                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
249                                 qAverage=0;
250                                 qThreshold=0;
251                         }
252                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
253                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
254                                 abort = true;
255                         }
256                 }
257
258         }
259         catch(exception& e) {
260                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
261                 exit(1);
262         }
263 }
264
265 //**********************************************************************************************************************
266
267 void TrimSeqsCommand::help(){
268         try {
269                 m->mothurOut("The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n");
270                 m->mothurOut("The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n");
271                 m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, group, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n");
272                 m->mothurOut("The fasta parameter is required.\n");
273                 m->mothurOut("The group parameter allows you to enter a group file for your fasta file.\n");
274                 m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n");
275                 m->mothurOut("The oligos parameter allows you to provide an oligos file.\n");
276                 m->mothurOut("The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n");
277                 m->mothurOut("The maxhomop parameter allows you to set a maximum homopolymer length. \n");
278                 m->mothurOut("The minlength parameter allows you to set and minimum sequence length. \n");
279                 m->mothurOut("The maxlength parameter allows you to set and maximum sequence length. \n");
280                 m->mothurOut("The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n");
281                 m->mothurOut("The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n");
282                 m->mothurOut("The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n");
283                 m->mothurOut("The qfile parameter allows you to provide a quality file.\n");
284                 m->mothurOut("The qthreshold parameter allows you to set a minimum quality score allowed. \n");
285                 m->mothurOut("The qaverage parameter allows you to set a minimum average quality score allowed. \n");
286                 m->mothurOut("The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n");
287                 m->mothurOut("The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n");
288                 m->mothurOut("The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n");
289                 m->mothurOut("The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n");
290                 m->mothurOut("The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n");
291                 m->mothurOut("The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n");
292                 m->mothurOut("The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n");
293                 m->mothurOut("The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n");
294                 m->mothurOut("The trim.seqs command should be in the following format: \n");
295                 m->mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n");
296                 m->mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n");    
297                 m->mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
298                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
299                 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
300
301         }
302         catch(exception& e) {
303                 m->errorOut(e, "TrimSeqsCommand", "help");
304                 exit(1);
305         }
306 }
307
308
309 //***************************************************************************************************************
310
311 TrimSeqsCommand::~TrimSeqsCommand(){    /*      do nothing      */      }
312
313 //***************************************************************************************************************
314
315 int TrimSeqsCommand::execute(){
316         try{
317         
318                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
319                 
320                 numFPrimers = 0;  //this needs to be initialized
321                 numRPrimers = 0;
322                 vector<vector<string> > fastaFileNames;
323                 vector<vector<string> > qualFileNames;
324                 
325                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
326                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
327                 
328                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
329                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
330                 
331                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
332                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
333                 if (qFileName != "") {
334                         outputNames.push_back(trimQualFile);
335                         outputNames.push_back(scrapQualFile);
336                         outputTypes["qual"].push_back(trimQualFile);
337                         outputTypes["qual"].push_back(scrapQualFile); 
338                 }
339                 
340                 string outputGroupFileName;
341
342                 if(oligoFile != ""){
343                         outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
344                         outputNames.push_back(outputGroupFileName); outputTypes["groups"].push_back(outputGroupFileName);
345                         getOligos(fastaFileNames, qualFileNames);
346                 }
347
348                 vector<unsigned long int> fastaFilePos;
349                 vector<unsigned long int> qFilePos;
350                 
351                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
352                 
353                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
354                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
355                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
356                 }       
357                 if(qFileName == "")     {       qLines = lines; } //files with duds
358                 
359                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
360                                 if(processors == 1){
361                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]);
362                                 }else{
363                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames); 
364                                 }       
365                 #else
366                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
367                 #endif
368                 
369                 if (m->control_pressed) {  return 0; }                  
370                         
371                                 
372                 if(allFiles){
373                         for(int i=0;i<fastaFileNames.size();i++){
374                                 for(int j=0;j<fastaFileNames[0].size();j++){
375                                         if(m->isBlank(fastaFileNames[i][j])){
376                                                 remove(fastaFileNames[i][j].c_str());
377
378                                                 if(qFileName != ""){
379                                                         remove(fastaFileNames[i][j].c_str());
380                                                 }
381
382                                         }
383                                 }
384                         }
385                 }
386                 
387                 
388                 
389                 if (m->control_pressed) { 
390                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
391                         return 0;
392                 }
393
394                 m->mothurOutEndLine();
395                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
396                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
397                 m->mothurOutEndLine();
398                 
399                 return 0;       
400                         
401         }
402         catch(exception& e) {
403                 m->errorOut(e, "TrimSeqsCommand", "execute");
404                 exit(1);
405         }
406 }
407                 
408 /**************************************************************************************/
409
410 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, linePair* line, linePair* qline) {        
411                 
412         try {
413                 
414                 ofstream trimFASTAFile;
415                 m->openOutputFile(trimFileName, trimFASTAFile);
416                 
417                 ofstream scrapFASTAFile;
418                 m->openOutputFile(scrapFileName, scrapFASTAFile);
419                 
420                 ofstream trimQualFile;
421                 ofstream scrapQualFile;
422                 if(qFileName != ""){
423                         m->openOutputFile(trimQFileName, trimQualFile);
424                         m->openOutputFile(scrapQFileName, scrapQualFile);
425                 }
426                 
427                 ofstream outGroupsFile;
428                 if (oligoFile != ""){   m->openOutputFile(groupFileName, outGroupsFile);   }
429                 
430                 if(allFiles){
431                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
432                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
433                                         ofstream temp;
434                                         m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
435                                         if(qFileName != ""){
436                                                 m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
437                                         }
438                                 }
439                         }
440                 }
441                 
442                 ifstream inFASTA;
443                 m->openInputFile(filename, inFASTA);
444                 inFASTA.seekg(line->start);
445                 
446                 ifstream qFile;
447                 if(qFileName != "")     {
448                         m->openInputFile(qFileName, qFile);
449                         qFile.seekg(qline->start);  
450                 }
451                 
452                 int count = 0;
453                 bool moreSeqs = 1;
454         
455                 while (moreSeqs) {
456                                 
457                         if (m->control_pressed) { 
458                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
459                                 if (oligoFile != "") {   outGroupsFile.close();   }
460
461                                 if(qFileName != ""){
462                                         qFile.close();
463                                 }
464                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
465
466                                 return 0;
467                         }
468                         
469                         int success = 1;
470                         string trashCode = "";
471                         int currentSeqsDiffs = 0;
472
473                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
474
475                         QualityScores currQual;
476                         if(qFileName != ""){
477                                 currQual = QualityScores(qFile, currSeq.getNumBases());  m->gobble(qFile);
478                         }
479
480                         string origSeq = currSeq.getUnaligned();
481                         if (origSeq != "") {
482                                 
483                                 int barcodeIndex = 0;
484                                 int primerIndex = 0;
485                                 
486                                 if(barcodes.size() != 0){
487                                         success = stripBarcode(currSeq, currQual, barcodeIndex);
488                                         if(success > bdiffs)            {       trashCode += 'b';       }
489                                         else{ currentSeqsDiffs += success;  }
490                                 }
491                                 
492                                 if(numFPrimers != 0){
493                                         success = stripForward(currSeq, currQual, primerIndex);
494                                         if(success > pdiffs)            {       trashCode += 'f';       }
495                                         else{ currentSeqsDiffs += success;  }
496                                 }
497                                 
498                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
499                                 
500                                 if(numRPrimers != 0){
501                                         success = stripReverse(currSeq, currQual);
502                                         if(!success)                            {       trashCode += 'r';       }
503                                 }
504
505                                 if(keepFirst != 0){
506                                         success = keepFirstTrim(currSeq, currQual);
507                                 }
508                                 
509                                 if(removeLast != 0){
510                                         success = removeLastTrim(currSeq, currQual);
511                                         if(!success)                            {       trashCode += 'l';       }
512                                 }
513
514                                 
515                                 if(qFileName != ""){
516                                         int origLength = currSeq.getNumBases();
517                                         
518                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
519                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
520                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
521                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
522                                         else                                            {       success = 1;                            }
523                                         
524                                         //you don't want to trim, if it fails above then scrap it
525                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
526                                         
527                                         if(!success)                            {       trashCode += 'q';       }
528                                 }                               
529                 
530                                 if(minLength > 0 || maxLength > 0){
531                                         success = cullLength(currSeq);
532                                         if(!success)                            {       trashCode += 'l';       }
533                                 }
534                                 if(maxHomoP > 0){
535                                         success = cullHomoP(currSeq);
536                                         if(!success)                            {       trashCode += 'h';       }
537                                 }
538                                 if(maxAmbig != -1){
539                                         success = cullAmbigs(currSeq);
540                                         if(!success)                            {       trashCode += 'n';       }
541                                 }
542                                 
543                                 if(flip){               // should go last                       
544                                         currSeq.reverseComplement();
545                                         if(qFileName != ""){
546                                                 currQual.flipQScores(); 
547                                         }
548                                 }
549                                 
550                                 if(trashCode.length() == 0){
551                                         currSeq.setAligned(currSeq.getUnaligned());
552                                         currSeq.printSequence(trimFASTAFile);
553                                         
554                                         if(qFileName != ""){
555                                                 currQual.printQScores(trimQualFile);
556                                         }
557                                         
558                                         if(barcodes.size() != 0){
559                                                 outGroupsFile << currSeq.getName() << '\t' << barcodeNameVector[barcodeIndex] << endl;
560                                         }
561                                         
562                                         
563                                         if(allFiles){
564                                                 ofstream output;
565                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
566                                                 currSeq.printSequence(output);
567                                                 output.close();
568                                                 
569                                                 if(qFileName != ""){
570                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
571                                                         currQual.printQScores(output);
572                                                         output.close();                                                 
573                                                 }
574                                         }
575                                 }
576                                 else{
577                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
578                                         currSeq.setUnaligned(origSeq);
579                                         currSeq.setAligned(origSeq);
580                                         currSeq.printSequence(scrapFASTAFile);
581                                         if(qFileName != ""){
582                                                 currQual.printQScores(scrapQualFile);
583                                         }
584                                 }
585                                 count++;
586                         }
587                         
588                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
589                                 unsigned long int pos = inFASTA.tellg();
590                                 if ((pos == -1) || (pos >= line->end)) { break; }
591                         #else
592                                 if (inFASTA.eof()) { break; }
593                         #endif
594                                 
595                         //report progress
596                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
597                         
598                 }
599                 //report progress
600                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
601
602                 
603                 inFASTA.close();
604                 trimFASTAFile.close();
605                 scrapFASTAFile.close();
606                 if (oligoFile != "") {   outGroupsFile.close();   }
607                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
608                 
609                 return count;
610         }
611         catch(exception& e) {
612                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
613                 exit(1);
614         }
615 }
616
617 /**************************************************************************************************/
618
619 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames) {
620         try {
621 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
622                 int process = 1;
623                 int exitCommand = 1;
624                 processIDS.clear();
625                 
626                 //loop through and create all the processes you want
627                 while (process != processors) {
628                         int pid = fork();
629                         
630                         if (pid > 0) {
631                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
632                                 process++;
633                         }else if (pid == 0){
634                                 
635                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
636                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
637
638                                 if(allFiles){
639                                         ofstream temp;
640
641                                         for(int i=0;i<tempFASTAFileNames.size();i++){
642                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
643                                                         tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
644                                                         m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
645
646                                                         if(qFileName != ""){
647                                                                 tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
648                                                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
649                                                         }
650                                                 }
651                                         }
652                                 }
653                                                         
654                                 driverCreateTrim(filename,
655                                                                  qFileName,
656                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
657                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
658                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
659                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
660                                                                  (groupFile + toString(getpid()) + ".temp"),
661                                                                  tempFASTAFileNames,
662                                                                  tempPrimerQualFileNames,
663                                                                  lines[process],
664                                                                  qLines[process]);
665                                 
666                                 exit(0);
667                         }else { 
668                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
669                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
670                                 exit(0);
671                         }
672                 }
673                 
674                 //parent do my part
675                 ofstream temp;
676                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
677                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
678                 m->openOutputFile(trimQualFileName, temp);              temp.close();
679                 m->openOutputFile(scrapQualFileName, temp);             temp.close();
680
681                 
682                 
683                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
684                 
685                 
686                 //force parent to wait until all the processes are done
687                 for (int i=0;i<processIDS.size();i++) { 
688                         int temp = processIDS[i];
689                         wait(&temp);
690                 }
691                 
692                 //append files
693                 for(int i=0;i<processIDS.size();i++){
694                         
695                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
696                         
697                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
698                         remove((trimFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
699                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
700                         remove((scrapFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
701                         
702                         if(qFileName != ""){
703                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
704                                 remove((trimQualFileName + toString(processIDS[i]) + ".temp").c_str());
705                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
706                                 remove((scrapQualFileName + toString(processIDS[i]) + ".temp").c_str());
707                         }
708                         
709                         m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
710                         remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
711                         
712                         
713                         if(allFiles){
714                                 for(int j=0;j<fastaFileNames.size();j++){
715                                         for(int k=0;k<fastaFileNames[j].size();k++){
716                                                 m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
717                                                 remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
718                                                 
719                                                 if(qFileName != ""){
720                                                         m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
721                                                         remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
722                                                 }
723                                         }
724                                 }
725                         }
726                         
727                 }
728         
729                 return exitCommand;
730 #endif          
731         }
732         catch(exception& e) {
733                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
734                 exit(1);
735         }
736 }
737
738 /**************************************************************************************************/
739
740 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
741         try {
742                 
743                 //set file positions for fasta file
744                 fastaFilePos = m->divideFile(filename, processors);
745                 
746                 if (qfilename == "") { return processors; }
747                 
748                 //get name of first sequence in each chunk
749                 map<string, int> firstSeqNames;
750                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
751                         ifstream in;
752                         m->openInputFile(filename, in);
753                         in.seekg(fastaFilePos[i]);
754                 
755                         Sequence temp(in); 
756                         firstSeqNames[temp.getName()] = i;
757                 
758                         in.close();
759                 }
760                                 
761                 //seach for filePos of each first name in the qfile and save in qfileFilePos
762                 ifstream inQual;
763                 m->openInputFile(qfilename, inQual);
764                 
765                 string input;
766                 while(!inQual.eof()){   
767                         input = m->getline(inQual);
768
769                         if (input.length() != 0) {
770                                 if(input[0] == '>'){ //this is a sequence name line
771                                         istringstream nameStream(input);
772                                         
773                                         string sname = "";  nameStream >> sname;
774                                         sname = sname.substr(1);
775                                         
776                                         map<string, int>::iterator it = firstSeqNames.find(sname);
777                                         
778                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
779                                                 unsigned long int pos = inQual.tellg(); 
780                                                 qfileFilePos.push_back(pos - input.length() - 1);       
781                                                 firstSeqNames.erase(it);
782                                         }
783                                 }
784                         }
785                         
786                         if (firstSeqNames.size() == 0) { break; }
787                 }
788                 inQual.close();
789                 
790                 
791                 if (firstSeqNames.size() != 0) { 
792                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
793                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
794                         }
795                         qFileName = "";
796                         return processors;
797                 }
798
799                 //get last file position of qfile
800                 FILE * pFile;
801                 unsigned long int size;
802                 
803                 //get num bytes in file
804                 pFile = fopen (qfilename.c_str(),"rb");
805                 if (pFile==NULL) perror ("Error opening file");
806                 else{
807                         fseek (pFile, 0, SEEK_END);
808                         size=ftell (pFile);
809                         fclose (pFile);
810                 }
811                 
812                 qfileFilePos.push_back(size);
813                 
814                 return processors;
815         }
816         catch(exception& e) {
817                 m->errorOut(e, "TrimSeqsCommand", "setLines");
818                 exit(1);
819         }
820 }
821
822 //***************************************************************************************************************
823
824 void TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames){
825         try {
826                 ifstream inOligos;
827                 m->openInputFile(oligoFile, inOligos);
828                 
829                 ofstream test;
830                 
831                 string type, oligo, group;
832
833                 int indexPrimer = 0;
834                 int indexBarcode = 0;
835                 
836                 while(!inOligos.eof()){
837
838                         inOligos >> type; m->gobble(inOligos);
839                                         
840                         if(type[0] == '#'){
841                                 while (!inOligos.eof()) {       char c = inOligos.get(); if (c == 10 || c == 13){       break;  }       } // get rest of line if there's any crap there
842                         }
843                         else{
844                                 //make type case insensitive
845                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
846                                 
847                                 inOligos >> oligo;
848                                 
849                                 for(int i=0;i<oligo.length();i++){
850                                         oligo[i] = toupper(oligo[i]);
851                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
852                                 }
853                                 
854                                 if(type == "FORWARD"){
855                                         group = "";
856                                         
857                                         // get rest of line in case there is a primer name
858                                         while (!inOligos.eof()) {       
859                                                 char c = inOligos.get(); 
860                                                 if (c == 10 || c == 13){        break;  }
861                                                 else if (c == 32 || c == 9){;} //space or tab
862                                                 else {  group += c;  }
863                                         } 
864                                         
865                                         //check for repeat barcodes
866                                         map<string, int>::iterator itPrime = primers.find(oligo);
867                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
868                                         
869                                         primers[oligo]=indexPrimer; indexPrimer++;              
870                                         primerNameVector.push_back(group);
871                                 }
872                                 else if(type == "REVERSE"){
873                                         Sequence oligoRC("reverse", oligo);
874                                         oligoRC.reverseComplement();
875                                         revPrimer.push_back(oligoRC.getUnaligned());
876                                 }
877                                 else if(type == "BARCODE"){
878                                         inOligos >> group;
879                                         
880                                         //check for repeat barcodes
881                                         map<string, int>::iterator itBar = barcodes.find(oligo);
882                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
883                                                 
884                                         barcodes[oligo]=indexBarcode; indexBarcode++;
885                                         barcodeNameVector.push_back(group);
886                                 }
887                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
888                         }
889                         m->gobble(inOligos);
890                 }       
891                 inOligos.close();
892                 
893                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
894                 
895                 //add in potential combos
896                 if(barcodeNameVector.size() == 0){
897                         barcodes[""] = 0;
898                         barcodeNameVector.push_back("");                        
899                 }
900                 
901                 if(primerNameVector.size() == 0){
902                         primers[""] = 0;
903                         primerNameVector.push_back("");                 
904                 }
905                 
906                 fastaFileNames.resize(barcodeNameVector.size());
907                 for(int i=0;i<fastaFileNames.size();i++){
908                         fastaFileNames[i].assign(primerNameVector.size(), "");
909                 }
910                 if(qFileName != ""){    qualFileNames = fastaFileNames; }
911                 
912                 if(allFiles){
913                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
914                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
915                                         
916                                         string primerName = primerNameVector[itPrimer->second];
917                                         string barcodeName = barcodeNameVector[itBar->second];
918                                         
919                                         string comboGroupName = "";
920                                         string fastaFileName = "";
921                                         string qualFileName = "";
922                                         
923                                         if(primerName == ""){
924                                                 comboGroupName = barcodeNameVector[itBar->second];
925                                         }
926                                         else{
927                                                 if(barcodeName == ""){
928                                                         comboGroupName = primerNameVector[itPrimer->second];
929                                                 }
930                                                 else{
931                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
932                                                 }
933                                         }
934
935                                         ofstream temp;
936                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
937                                         outputNames.push_back(fastaFileName);
938                                         outputTypes["fasta"].push_back(fastaFileName);
939                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
940                                         m->openOutputFile(fastaFileName, temp);         temp.close();
941
942                                         if(qFileName != ""){
943                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
944                                                 outputNames.push_back(qualFileName);
945                                                 outputTypes["qual"].push_back(qualFileName);
946                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
947                                                 m->openOutputFile(qualFileName, temp);          temp.close();
948                                         }
949                                 }
950                         }
951                 }
952                 numFPrimers = primers.size();
953                 numRPrimers = revPrimer.size();
954
955         }
956         catch(exception& e) {
957                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
958                 exit(1);
959         }
960 }
961
962 //***************************************************************************************************************
963
964 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
965         try {
966                 
967                 string rawSequence = seq.getUnaligned();
968                 int success = bdiffs + 1;       //guilty until proven innocent
969                 
970                 //can you find the barcode
971                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
972                         string oligo = it->first;
973                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
974                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
975                                 break;  
976                         }
977                         
978                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
979                                 group = it->second;
980                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
981                                 
982                                 if(qual.getName() != ""){
983                                         qual.trimQScores(oligo.length(), -1);
984                                 }
985                                 
986                                 success = 0;
987                                 break;
988                         }
989                 }
990                 
991                 //if you found the barcode or if you don't want to allow for diffs
992                 if ((bdiffs == 0) || (success == 0)) { return success;  }
993                 
994                 else { //try aligning and see if you can find it
995
996                         int maxLength = 0;
997
998                         Alignment* alignment;
999                         if (barcodes.size() > 0) {
1000                                 map<string,int>::iterator it=barcodes.begin();
1001
1002                                 for(it;it!=barcodes.end();it++){
1003                                         if(it->first.length() > maxLength){
1004                                                 maxLength = it->first.length();
1005                                         }
1006                                 }
1007                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
1008
1009                         }else{ alignment = NULL; } 
1010                         
1011                         //can you find the barcode
1012                         int minDiff = 1e6;
1013                         int minCount = 1;
1014                         int minGroup = -1;
1015                         int minPos = 0;
1016                         
1017                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1018                                 string oligo = it->first;
1019 //                              int length = oligo.length();
1020                                 
1021                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
1022                                         success = bdiffs + 10;
1023                                         break;
1024                                 }
1025                                 
1026                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1027                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1028                                 oligo = alignment->getSeqAAln();
1029                                 string temp = alignment->getSeqBAln();
1030                 
1031                                 int alnLength = oligo.length();
1032                                 
1033                                 for(int i=oligo.length()-1;i>=0;i--){
1034                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1035                                 }
1036                                 oligo = oligo.substr(0,alnLength);
1037                                 temp = temp.substr(0,alnLength);
1038                                 
1039                                 int numDiff = countDiffs(oligo, temp);
1040                                 
1041                                 if(numDiff < minDiff){
1042                                         minDiff = numDiff;
1043                                         minCount = 1;
1044                                         minGroup = it->second;
1045                                         minPos = 0;
1046                                         for(int i=0;i<alnLength;i++){
1047                                                 if(temp[i] != '-'){
1048                                                         minPos++;
1049                                                 }
1050                                         }
1051                                 }
1052                                 else if(numDiff == minDiff){
1053                                         minCount++;
1054                                 }
1055
1056                         }
1057
1058                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1059                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1060                         else{                                                                                                   //use the best match
1061                                 group = minGroup;
1062                                 seq.setUnaligned(rawSequence.substr(minPos));
1063                                 
1064                                 if(qual.getName() != ""){
1065                                         qual.trimQScores(minPos, -1);
1066                                 }
1067                                 success = minDiff;
1068                         }
1069                         
1070                         if (alignment != NULL) {  delete alignment;  }
1071                         
1072                 }
1073                 
1074                 return success;
1075                 
1076         }
1077         catch(exception& e) {
1078                 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1079                 exit(1);
1080         }
1081
1082 }
1083
1084 //***************************************************************************************************************
1085
1086 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1087         try {
1088                 string rawSequence = seq.getUnaligned();
1089                 int success = pdiffs + 1;       //guilty until proven innocent
1090                 
1091                 //can you find the primer
1092                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1093                         string oligo = it->first;
1094                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1095                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1096                                 break;  
1097                         }
1098                         
1099                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1100                                 group = it->second;
1101                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1102                                 if(qual.getName() != ""){
1103                                         qual.trimQScores(oligo.length(), -1);
1104                                 }
1105                                 success = 0;
1106                                 break;
1107                         }
1108                 }
1109
1110                 //if you found the barcode or if you don't want to allow for diffs
1111                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1112                 
1113                 else { //try aligning and see if you can find it
1114
1115                         int maxLength = 0;
1116
1117                         Alignment* alignment;
1118                         if (primers.size() > 0) {
1119                                 map<string,int>::iterator it=primers.begin();
1120
1121                                 for(it;it!=primers.end();it++){
1122                                         if(it->first.length() > maxLength){
1123                                                 maxLength = it->first.length();
1124                                         }
1125                                 }
1126                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
1127
1128                         }else{ alignment = NULL; } 
1129                         
1130                         //can you find the barcode
1131                         int minDiff = 1e6;
1132                         int minCount = 1;
1133                         int minGroup = -1;
1134                         int minPos = 0;
1135                         
1136                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1137                                 string oligo = it->first;
1138 //                              int length = oligo.length();
1139                                 
1140                                 if(rawSequence.length() < maxLength){   
1141                                         success = pdiffs + 100;
1142                                         break;
1143                                 }
1144                                 
1145                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1146                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1147                                 oligo = alignment->getSeqAAln();
1148                                 string temp = alignment->getSeqBAln();
1149                 
1150                                 int alnLength = oligo.length();
1151                                 
1152                                 for(int i=oligo.length()-1;i>=0;i--){
1153                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1154                                 }
1155                                 oligo = oligo.substr(0,alnLength);
1156                                 temp = temp.substr(0,alnLength);
1157                                 
1158                                 int numDiff = countDiffs(oligo, temp);
1159                                 
1160                                 if(numDiff < minDiff){
1161                                         minDiff = numDiff;
1162                                         minCount = 1;
1163                                         minGroup = it->second;
1164                                         minPos = 0;
1165                                         for(int i=0;i<alnLength;i++){
1166                                                 if(temp[i] != '-'){
1167                                                         minPos++;
1168                                                 }
1169                                         }
1170                                 }
1171                                 else if(numDiff == minDiff){
1172                                         minCount++;
1173                                 }
1174
1175                         }
1176
1177                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1178                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1179                         else{                                                                                                   //use the best match
1180                                 group = minGroup;
1181                                 seq.setUnaligned(rawSequence.substr(minPos));
1182                                 if(qual.getName() != ""){
1183                                         qual.trimQScores(minPos, -1);
1184                                 }
1185                                 success = minDiff;
1186                         }
1187                         
1188                         if (alignment != NULL) {  delete alignment;  }
1189                         
1190                 }
1191                 
1192                 return success;
1193
1194         }
1195         catch(exception& e) {
1196                 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1197                 exit(1);
1198         }
1199 }
1200
1201 //***************************************************************************************************************
1202
1203 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1204         try {
1205                 string rawSequence = seq.getUnaligned();
1206                 bool success = 0;       //guilty until proven innocent
1207                 
1208                 for(int i=0;i<numRPrimers;i++){
1209                         string oligo = revPrimer[i];
1210                         
1211                         if(rawSequence.length() < oligo.length()){
1212                                 success = 0;
1213                                 break;
1214                         }
1215                         
1216                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1217                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1218                                 if(qual.getName() != ""){
1219                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1220                                 }
1221                                 success = 1;
1222                                 break;
1223                         }
1224                 }       
1225                 return success;
1226                 
1227         }
1228         catch(exception& e) {
1229                 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1230                 exit(1);
1231         }
1232 }
1233
1234 //***************************************************************************************************************
1235
1236 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1237         try {
1238                 bool success = 1;
1239                 if(qscores.getName() != ""){
1240                         qscores.trimQScores(-1, keepFirst);
1241                 }
1242                 sequence.trim(keepFirst);
1243                 return success;
1244         }
1245         catch(exception& e) {
1246                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1247                 exit(1);
1248         }
1249         
1250 }       
1251
1252 //***************************************************************************************************************
1253
1254 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1255         try {
1256                 bool success = 0;
1257                 
1258                 int length = sequence.getNumBases() - removeLast;
1259                 
1260                 if(length > 0){
1261                         if(qscores.getName() != ""){
1262                                 qscores.trimQScores(-1, length);
1263                         }
1264                         sequence.trim(length);
1265                         success = 1;
1266                 }
1267                 else{
1268                         success = 0;
1269                 }
1270
1271                 return success;
1272         }
1273         catch(exception& e) {
1274                 m->errorOut(e, "removeLastTrim", "countDiffs");
1275                 exit(1);
1276         }
1277         
1278 }       
1279
1280 //***************************************************************************************************************
1281
1282 bool TrimSeqsCommand::cullLength(Sequence& seq){
1283         try {
1284         
1285                 int length = seq.getNumBases();
1286                 bool success = 0;       //guilty until proven innocent
1287                 
1288                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1289                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1290                 else                                                                                            {       success = 0;    }
1291                 
1292                 return success;
1293         
1294         }
1295         catch(exception& e) {
1296                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1297                 exit(1);
1298         }
1299         
1300 }
1301
1302 //***************************************************************************************************************
1303
1304 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1305         try {
1306                 int longHomoP = seq.getLongHomoPolymer();
1307                 bool success = 0;       //guilty until proven innocent
1308                 
1309                 if(longHomoP <= maxHomoP){      success = 1;    }
1310                 else                                    {       success = 0;    }
1311                 
1312                 return success;
1313         }
1314         catch(exception& e) {
1315                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1316                 exit(1);
1317         }
1318         
1319 }
1320
1321 //***************************************************************************************************************
1322
1323 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1324         try {
1325                 int numNs = seq.getAmbigBases();
1326                 bool success = 0;       //guilty until proven innocent
1327                 
1328                 if(numNs <= maxAmbig)   {       success = 1;    }
1329                 else                                    {       success = 0;    }
1330                 
1331                 return success;
1332         }
1333         catch(exception& e) {
1334                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1335                 exit(1);
1336         }
1337         
1338 }
1339
1340 //***************************************************************************************************************
1341
1342 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1343         try {
1344                 bool success = 1;
1345                 int length = oligo.length();
1346                 
1347                 for(int i=0;i<length;i++){
1348                         
1349                         if(oligo[i] != seq[i]){
1350                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1351                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1352                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1353                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1354                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1355                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1356                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1357                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1358                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1359                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1360                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1361                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1362                                 
1363                                 if(success == 0)        {       break;   }
1364                         }
1365                         else{
1366                                 success = 1;
1367                         }
1368                 }
1369                 
1370                 return success;
1371         }
1372         catch(exception& e) {
1373                 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1374                 exit(1);
1375         }
1376
1377 }
1378
1379 //***************************************************************************************************************
1380
1381 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1382         try {
1383
1384                 int length = oligo.length();
1385                 int countDiffs = 0;
1386                 
1387                 for(int i=0;i<length;i++){
1388                                                                 
1389                         if(oligo[i] != seq[i]){
1390                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1391                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1392                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1393                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1394                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1395                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1396                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1397                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1398                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1399                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1400                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1401                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1402                         }
1403                         
1404                 }
1405                 
1406                 return countDiffs;
1407         }
1408         catch(exception& e) {
1409                 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1410                 exit(1);
1411         }
1412
1413 }
1414
1415 //***************************************************************************************************************