]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
bug fixes
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //**********************************************************************************************************************
14
15 vector<string> TrimSeqsCommand::getValidParameters(){   
16         try {
17                 string Array[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop","minlength", "maxlength", "qfile", 
18                                                                         "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
19                                                                         "keepfirst", "removelast",
20                                                                         "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
21                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
22                 return myArray;
23         }
24         catch(exception& e) {
25                 m->errorOut(e, "TrimSeqsCommand", "getValidParameters");
26                 exit(1);
27         }
28 }
29
30 //**********************************************************************************************************************
31
32 TrimSeqsCommand::TrimSeqsCommand(){     
33         try {
34                 abort = true; calledHelp = true; 
35                 vector<string> tempOutNames;
36                 outputTypes["fasta"] = tempOutNames;
37                 outputTypes["qfile"] = tempOutNames;
38                 outputTypes["group"] = tempOutNames;
39         }
40         catch(exception& e) {
41                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
42                 exit(1);
43         }
44 }
45
46 //**********************************************************************************************************************
47
48 vector<string> TrimSeqsCommand::getRequiredParameters(){        
49         try {
50                 string Array[] =  {"fasta"};
51                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
52                 return myArray;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "TrimSeqsCommand", "getRequiredParameters");
56                 exit(1);
57         }
58 }
59
60 //**********************************************************************************************************************
61
62 vector<string> TrimSeqsCommand::getRequiredFiles(){     
63         try {
64                 vector<string> myArray;
65                 return myArray;
66         }
67         catch(exception& e) {
68                 m->errorOut(e, "TrimSeqsCommand", "getRequiredFiles");
69                 exit(1);
70         }
71 }
72
73 //***************************************************************************************************************
74
75 TrimSeqsCommand::TrimSeqsCommand(string option)  {
76         try {
77                 
78                 abort = false; calledHelp = false;   
79                 comboStarts = 0;
80                 
81                 //allow user to run help
82                 if(option == "help") { help(); abort = true; calledHelp = true; }
83                 
84                 else {
85                         //valid paramters for this command
86                         string AlignArray[] =  {        "fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", 
87                                                                 "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
88                                                                 "keepfirst", "removelast",
89                                                                 "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
90                         
91                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
92                         
93                         OptionParser parser(option);
94                         map<string,string> parameters = parser.getParameters();
95                         
96                         ValidParameters validParameter;
97                         map<string,string>::iterator it;
98                         
99                         //check to make sure all parameters are valid for command
100                         for (it = parameters.begin(); it != parameters.end(); it++) { 
101                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
102                         }
103                         
104                         //initialize outputTypes
105                         vector<string> tempOutNames;
106                         outputTypes["fasta"] = tempOutNames;
107                         outputTypes["qfile"] = tempOutNames;
108                         outputTypes["group"] = tempOutNames;
109                         
110                         //if the user changes the input directory command factory will send this info to us in the output parameter 
111                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
112                         if (inputDir == "not found"){   inputDir = "";          }
113                         else {
114                                 string path;
115                                 it = parameters.find("fasta");
116                                 //user has given a template file
117                                 if(it != parameters.end()){ 
118                                         path = m->hasPath(it->second);
119                                         //if the user has not given a path then, add inputdir. else leave path alone.
120                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
121                                 }
122                                 
123                                 it = parameters.find("oligos");
124                                 //user has given a template file
125                                 if(it != parameters.end()){ 
126                                         path = m->hasPath(it->second);
127                                         //if the user has not given a path then, add inputdir. else leave path alone.
128                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
129                                 }
130                                 
131                                 it = parameters.find("qfile");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
137                                 }
138                                 
139                         }
140
141                         
142                         //check for required parameters
143                         fastaFile = validParameter.validFile(parameters, "fasta", true);
144                         if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the trim.seqs command."); m->mothurOutEndLine(); abort = true; }
145                         else if (fastaFile == "not open") { abort = true; }     
146                         
147                         //if the user changes the output directory command factory will send this info to us in the output parameter 
148                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
149                                 outputDir = ""; 
150                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
151                         }
152                 
153                         
154                         //check for optional parameter and set defaults
155                         // ...at some point should added some additional type checking...
156                         string temp;
157                         temp = validParameter.validFile(parameters, "flip", false);
158                         if (temp == "not found"){       flip = 0;       }
159                         else if(m->isTrue(temp))        {       flip = 1;       }
160                 
161                         temp = validParameter.validFile(parameters, "oligos", true);
162                         if (temp == "not found"){       oligoFile = "";         }
163                         else if(temp == "not open"){    abort = true;   } 
164                         else                                    {       oligoFile = temp;               }
165                         
166                         
167                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
168                         convert(temp, maxAmbig);  
169
170                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
171                         convert(temp, maxHomoP);  
172
173                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
174                         convert(temp, minLength); 
175                         
176                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
177                         convert(temp, maxLength);
178                         
179                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
180                         convert(temp, bdiffs);
181                         
182                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
183                         convert(temp, pdiffs);
184                         
185                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
186                         convert(temp, tdiffs);
187                         
188                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
189                         
190                         temp = validParameter.validFile(parameters, "qfile", true);     
191                         if (temp == "not found")        {       qFileName = "";         }
192                         else if(temp == "not open")     {       abort = true;           }
193                         else                                            {       qFileName = temp;       }
194                         
195                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
196                         convert(temp, qThreshold);
197                         
198                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
199                         qtrim = m->isTrue(temp);
200
201                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
202                         convert(temp, qRollAverage);
203
204                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
205                         convert(temp, qWindowAverage);
206
207                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
208                         convert(temp, qWindowSize);
209
210                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
211                         convert(temp, qWindowStep);
212
213                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
214                         convert(temp, qAverage);
215
216                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
217                         convert(temp, keepFirst);
218
219                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
220                         convert(temp, removeLast);
221                         
222                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
223                         allFiles = m->isTrue(temp);
224                         
225                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found") { temp = "1"; }
226                         convert(temp, processors); 
227                         
228                         
229                         if(allFiles && (oligoFile == "")){
230                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
231                         }
232                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
233                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
234                                 qAverage=0;
235                                 qThreshold=0;
236                         }
237                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
238                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
239                                 abort = true;
240                         }
241                 }
242
243         }
244         catch(exception& e) {
245                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
246                 exit(1);
247         }
248 }
249
250 //**********************************************************************************************************************
251
252 void TrimSeqsCommand::help(){
253         try {
254                 m->mothurOut("The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n");
255                 m->mothurOut("The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n");
256                 m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n");
257                 m->mothurOut("The fasta parameter is required.\n");
258                 m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n");
259                 m->mothurOut("The oligos parameter allows you to provide an oligos file.\n");
260                 m->mothurOut("The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n");
261                 m->mothurOut("The maxhomop parameter allows you to set a maximum homopolymer length. \n");
262                 m->mothurOut("The minlength parameter allows you to set and minimum sequence length. \n");
263                 m->mothurOut("The maxlength parameter allows you to set and maximum sequence length. \n");
264                 m->mothurOut("The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n");
265                 m->mothurOut("The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n");
266                 m->mothurOut("The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n");
267                 m->mothurOut("The qfile parameter allows you to provide a quality file.\n");
268                 m->mothurOut("The qthreshold parameter allows you to set a minimum quality score allowed. \n");
269                 m->mothurOut("The qaverage parameter allows you to set a minimum average quality score allowed. \n");
270                 m->mothurOut("The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n");
271                 m->mothurOut("The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n");
272                 m->mothurOut("The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n");
273                 m->mothurOut("The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n");
274                 m->mothurOut("The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n");
275                 m->mothurOut("The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n");
276                 m->mothurOut("The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n");
277                 m->mothurOut("The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n");
278                 m->mothurOut("The trim.seqs command should be in the following format: \n");
279                 m->mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n");
280                 m->mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n");    
281                 m->mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
282                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
283                 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
284
285         }
286         catch(exception& e) {
287                 m->errorOut(e, "TrimSeqsCommand", "help");
288                 exit(1);
289         }
290 }
291
292
293 //***************************************************************************************************************
294
295 TrimSeqsCommand::~TrimSeqsCommand(){    /*      do nothing      */      }
296
297 //***************************************************************************************************************
298
299 int TrimSeqsCommand::execute(){
300         try{
301         
302                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
303                 
304                 numFPrimers = 0;  //this needs to be initialized
305                 numRPrimers = 0;
306                 vector<vector<string> > fastaFileNames;
307                 vector<vector<string> > qualFileNames;
308                 
309                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
310                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
311                 
312                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
313                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
314                 
315                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
316                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
317                 if (qFileName != "") {
318                         outputNames.push_back(trimQualFile);
319                         outputNames.push_back(scrapQualFile);
320                         outputTypes["qfile"].push_back(trimQualFile);
321                         outputTypes["qfile"].push_back(scrapQualFile); 
322                 }
323                 
324                 string outputGroupFileName;
325
326                 if(oligoFile != ""){
327                         outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
328                         outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
329                         getOligos(fastaFileNames, qualFileNames);
330                 }
331
332                 vector<unsigned long int> fastaFilePos;
333                 vector<unsigned long int> qFilePos;
334                 
335                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
336                 
337                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
338                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
339                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
340                 }       
341                 if(qFileName == "")     {       qLines = lines; } //files with duds
342                 
343                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
344                                 if(processors == 1){
345                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]);
346                                 }else{
347                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames); 
348                                 }       
349                 #else
350                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]);
351                 #endif
352                 
353                 if (m->control_pressed) {  return 0; }                  
354                         
355                                 
356                 if(allFiles){
357                         for(int i=0;i<fastaFileNames.size();i++){
358                                 for(int j=0;j<fastaFileNames[0].size();j++){
359                                         if(m->isBlank(fastaFileNames[i][j])){
360                                                 remove(fastaFileNames[i][j].c_str());
361
362                                                 if(qFileName != ""){
363                                                         remove(fastaFileNames[i][j].c_str());
364                                                 }
365
366                                         }
367                                 }
368                         }
369                 }
370                 
371                 
372                 
373                 if (m->control_pressed) { 
374                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
375                         return 0;
376                 }
377
378                 m->mothurOutEndLine();
379                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
380                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
381                 m->mothurOutEndLine();
382                 
383                 return 0;       
384                         
385         }
386         catch(exception& e) {
387                 m->errorOut(e, "TrimSeqsCommand", "execute");
388                 exit(1);
389         }
390 }
391                 
392 /**************************************************************************************/
393
394 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, linePair* line, linePair* qline) {        
395                 
396         try {
397                 
398                 ofstream trimFASTAFile;
399                 m->openOutputFile(trimFileName, trimFASTAFile);
400                 
401                 ofstream scrapFASTAFile;
402                 m->openOutputFile(scrapFileName, scrapFASTAFile);
403                 
404                 ofstream trimQualFile;
405                 ofstream scrapQualFile;
406                 if(qFileName != ""){
407                         m->openOutputFile(trimQFileName, trimQualFile);
408                         m->openOutputFile(scrapQFileName, scrapQualFile);
409                 }
410                 
411                 ofstream outGroupsFile;
412                 if (oligoFile != ""){   m->openOutputFile(groupFileName, outGroupsFile);   }
413                 
414                 if(allFiles){
415                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
416                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
417                                         ofstream temp;
418                                         m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
419                                         if(qFileName != ""){
420                                                 m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
421                                         }
422                                 }
423                         }
424                 }
425                 
426                 ifstream inFASTA;
427                 m->openInputFile(filename, inFASTA);
428                 inFASTA.seekg(line->start);
429                 
430                 ifstream qFile;
431                 if(qFileName != "")     {
432                         m->openInputFile(qFileName, qFile);
433                         qFile.seekg(qline->start);  
434                 }
435                 
436                 int count = 0;
437                 bool moreSeqs = 1;
438         
439                 while (moreSeqs) {
440                                 
441                         if (m->control_pressed) { 
442                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
443                                 if (oligoFile != "") {   outGroupsFile.close();   }
444
445                                 if(qFileName != ""){
446                                         qFile.close();
447                                 }
448                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
449
450                                 return 0;
451                         }
452                         
453                         int success = 1;
454                         string trashCode = "";
455                         int currentSeqsDiffs = 0;
456
457                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
458
459                         QualityScores currQual;
460                         if(qFileName != ""){
461                                 currQual = QualityScores(qFile);  m->gobble(qFile);
462                         }
463
464                         string origSeq = currSeq.getUnaligned();
465                         if (origSeq != "") {
466                                 
467                                 int barcodeIndex = 0;
468                                 int primerIndex = 0;
469                                 
470                                 if(barcodes.size() != 0){
471                                         success = stripBarcode(currSeq, currQual, barcodeIndex);
472                                         if(success > bdiffs)            {       trashCode += 'b';       }
473                                         else{ currentSeqsDiffs += success;  }
474                                 }
475                                 
476                                 if(numFPrimers != 0){
477                                         success = stripForward(currSeq, currQual, primerIndex);
478                                         if(success > pdiffs)            {       trashCode += 'f';       }
479                                         else{ currentSeqsDiffs += success;  }
480                                 }
481                                 
482                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
483                                 
484                                 if(numRPrimers != 0){
485                                         success = stripReverse(currSeq, currQual);
486                                         if(!success)                            {       trashCode += 'r';       }
487                                 }
488
489                                 if(keepFirst != 0){
490                                         success = keepFirstTrim(currSeq, currQual);
491                                 }
492                                 
493                                 if(removeLast != 0){
494                                         success = removeLastTrim(currSeq, currQual);
495                                         if(!success)                            {       trashCode += 'l';       }
496                                 }
497
498                                 
499                                 if(qFileName != ""){
500                                         int origLength = currSeq.getNumBases();
501                                         
502                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
503                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
504                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
505                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
506                                         else                                            {       success = 1;                            }
507                                         
508                                         //you don't want to trim, if it fails above then scrap it
509                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
510                                         
511                                         if(!success)                            {       trashCode += 'q';       }
512                                 }                               
513                 
514                                 if(minLength > 0 || maxLength > 0){
515                                         success = cullLength(currSeq);
516                                         if(!success)                            {       trashCode += 'l';       }
517                                 }
518                                 if(maxHomoP > 0){
519                                         success = cullHomoP(currSeq);
520                                         if(!success)                            {       trashCode += 'h';       }
521                                 }
522                                 if(maxAmbig != -1){
523                                         success = cullAmbigs(currSeq);
524                                         if(!success)                            {       trashCode += 'n';       }
525                                 }
526                                 
527                                 if(flip){               // should go last                       
528                                         currSeq.reverseComplement();
529                                         if(qFileName != ""){
530                                                 currQual.flipQScores(); 
531                                         }
532                                 }
533                                 
534                                 if(trashCode.length() == 0){
535                                         currSeq.setAligned(currSeq.getUnaligned());
536                                         currSeq.printSequence(trimFASTAFile);
537                                         
538                                         if(qFileName != ""){
539                                                 currQual.printQScores(trimQualFile);
540                                         }
541                                         
542                                         if(barcodes.size() != 0){
543                                                 outGroupsFile << currSeq.getName() << '\t' << barcodeNameVector[barcodeIndex] << endl;
544                                         }
545                                         
546                                         
547                                         if(allFiles){
548                                                 ofstream output;
549                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
550                                                 currSeq.printSequence(output);
551                                                 output.close();
552                                                 
553                                                 if(qFileName != ""){
554                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
555                                                         currQual.printQScores(output);
556                                                         output.close();                                                 
557                                                 }
558                                         }
559                                 }
560                                 else{
561                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
562                                         currSeq.setUnaligned(origSeq);
563                                         currSeq.setAligned(origSeq);
564                                         currSeq.printSequence(scrapFASTAFile);
565                                         if(qFileName != ""){
566                                                 currQual.printQScores(scrapQualFile);
567                                         }
568                                 }
569                                 count++;
570                         }
571                         
572                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
573                                 unsigned long int pos = inFASTA.tellg();
574                                 if ((pos == -1) || (pos >= line->end)) { break; }
575                         #else
576                                 if (inFASTA.eof()) { break; }
577                         #endif
578                                 
579                         //report progress
580                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
581                         
582                 }
583                 //report progress
584                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
585
586                 
587                 inFASTA.close();
588                 trimFASTAFile.close();
589                 scrapFASTAFile.close();
590                 if (oligoFile != "") {   outGroupsFile.close();   }
591                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
592                 
593                 return count;
594         }
595         catch(exception& e) {
596                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
597                 exit(1);
598         }
599 }
600
601 /**************************************************************************************************/
602
603 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames) {
604         try {
605 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
606                 int process = 1;
607                 int exitCommand = 1;
608                 processIDS.clear();
609                 
610                 //loop through and create all the processes you want
611                 while (process != processors) {
612                         int pid = fork();
613                         
614                         if (pid > 0) {
615                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
616                                 process++;
617                         }else if (pid == 0){
618                                 
619                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
620                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
621
622                                 if(allFiles){
623                                         ofstream temp;
624
625                                         for(int i=0;i<tempFASTAFileNames.size();i++){
626                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
627                                                         tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
628                                                         m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
629
630                                                         if(qFileName != ""){
631                                                                 tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
632                                                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
633                                                         }
634                                                 }
635                                         }
636                                 }
637                                                         
638                                 driverCreateTrim(filename,
639                                                                  qFileName,
640                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
641                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
642                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
643                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
644                                                                  (groupFile + toString(getpid()) + ".temp"),
645                                                                  tempFASTAFileNames,
646                                                                  tempPrimerQualFileNames,
647                                                                  lines[process],
648                                                                  qLines[process]);
649                                 
650                                 exit(0);
651                         }else { 
652                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
653                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
654                                 exit(0);
655                         }
656                 }
657                 
658                 //parent do my part
659                 ofstream temp;
660                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
661                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
662                 m->openOutputFile(trimQualFileName, temp);              temp.close();
663                 m->openOutputFile(scrapQualFileName, temp);             temp.close();
664
665                 
666                 
667                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
668                 
669                 
670                 //force parent to wait until all the processes are done
671                 for (int i=0;i<processIDS.size();i++) { 
672                         int temp = processIDS[i];
673                         wait(&temp);
674                 }
675                 
676                 //append files
677                 for(int i=0;i<processIDS.size();i++){
678                         
679                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
680                         
681                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
682                         remove((trimFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
683                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
684                         remove((scrapFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
685                         
686                         if(qFileName != ""){
687                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
688                                 remove((trimQualFileName + toString(processIDS[i]) + ".temp").c_str());
689                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
690                                 remove((scrapQualFileName + toString(processIDS[i]) + ".temp").c_str());
691                         }
692                         
693                         m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
694                         remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
695                         
696                         
697                         if(allFiles){
698                                 for(int j=0;j<fastaFileNames.size();j++){
699                                         for(int k=0;k<fastaFileNames[j].size();k++){
700                                                 m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
701                                                 remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
702                                                 
703                                                 if(qFileName != ""){
704                                                         m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
705                                                         remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
706                                                 }
707                                         }
708                                 }
709                         }
710                         
711                 }
712         
713                 return exitCommand;
714 #endif          
715         }
716         catch(exception& e) {
717                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
718                 exit(1);
719         }
720 }
721
722 /**************************************************************************************************/
723
724 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
725         try {
726                 
727                 //set file positions for fasta file
728                 fastaFilePos = m->divideFile(filename, processors);
729                 
730                 if (qfilename == "") { return processors; }
731                 
732                 //get name of first sequence in each chunk
733                 map<string, int> firstSeqNames;
734                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
735                         ifstream in;
736                         m->openInputFile(filename, in);
737                         in.seekg(fastaFilePos[i]);
738                 
739                         Sequence temp(in); 
740                         firstSeqNames[temp.getName()] = i;
741                 
742                         in.close();
743                 }
744                                 
745                 //seach for filePos of each first name in the qfile and save in qfileFilePos
746                 ifstream inQual;
747                 m->openInputFile(qfilename, inQual);
748                 
749                 string input;
750                 while(!inQual.eof()){   
751                         input = m->getline(inQual);
752
753                         if (input.length() != 0) {
754                                 if(input[0] == '>'){ //this is a sequence name line
755                                         istringstream nameStream(input);
756                                         
757                                         string sname = "";  nameStream >> sname;
758                                         sname = sname.substr(1);
759                                         
760                                         map<string, int>::iterator it = firstSeqNames.find(sname);
761                                         
762                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
763                                                 unsigned long int pos = inQual.tellg(); 
764                                                 qfileFilePos.push_back(pos - input.length() - 1);       
765                                                 firstSeqNames.erase(it);
766                                         }
767                                 }
768                         }
769                         
770                         if (firstSeqNames.size() == 0) { break; }
771                 }
772                 inQual.close();
773                 
774                 
775                 if (firstSeqNames.size() != 0) { 
776                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
777                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
778                         }
779                         qFileName = "";
780                         return processors;
781                 }
782
783                 //get last file position of qfile
784                 FILE * pFile;
785                 unsigned long int size;
786                 
787                 //get num bytes in file
788                 pFile = fopen (qfilename.c_str(),"rb");
789                 if (pFile==NULL) perror ("Error opening file");
790                 else{
791                         fseek (pFile, 0, SEEK_END);
792                         size=ftell (pFile);
793                         fclose (pFile);
794                 }
795                 
796                 qfileFilePos.push_back(size);
797                 
798                 return processors;
799         }
800         catch(exception& e) {
801                 m->errorOut(e, "TrimSeqsCommand", "setLines");
802                 exit(1);
803         }
804 }
805
806 //***************************************************************************************************************
807
808 void TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames){
809         try {
810                 ifstream inOligos;
811                 m->openInputFile(oligoFile, inOligos);
812                 
813                 ofstream test;
814                 
815                 string type, oligo, group;
816
817                 int indexPrimer = 0;
818                 int indexBarcode = 0;
819                 
820                 while(!inOligos.eof()){
821
822                         inOligos >> type; m->gobble(inOligos);
823                                         
824                         if(type[0] == '#'){
825                                 while (!inOligos.eof()) {       char c = inOligos.get(); if (c == 10 || c == 13){       break;  }       } // get rest of line if there's any crap there
826                         }
827                         else{
828                                 //make type case insensitive
829                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
830                                 
831                                 inOligos >> oligo;
832                                 
833                                 for(int i=0;i<oligo.length();i++){
834                                         oligo[i] = toupper(oligo[i]);
835                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
836                                 }
837                                 
838                                 if(type == "FORWARD"){
839                                         group = "";
840                                         
841                                         // get rest of line in case there is a primer name
842                                         while (!inOligos.eof()) {       
843                                                 char c = inOligos.get(); 
844                                                 if (c == 10 || c == 13){        break;  }
845                                                 else if (c == 32 || c == 9){;} //space or tab
846                                                 else {  group += c;  }
847                                         } 
848                                         
849                                         //check for repeat barcodes
850                                         map<string, int>::iterator itPrime = primers.find(oligo);
851                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
852                                         
853                                         primers[oligo]=indexPrimer; indexPrimer++;              
854                                         primerNameVector.push_back(group);
855                                 }
856                                 else if(type == "REVERSE"){
857                                         Sequence oligoRC("reverse", oligo);
858                                         oligoRC.reverseComplement();
859                                         revPrimer.push_back(oligoRC.getUnaligned());
860                                 }
861                                 else if(type == "BARCODE"){
862                                         inOligos >> group;
863                                         
864                                         //check for repeat barcodes
865                                         map<string, int>::iterator itBar = barcodes.find(oligo);
866                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
867                                                 
868                                         barcodes[oligo]=indexBarcode; indexBarcode++;
869                                         barcodeNameVector.push_back(group);
870                                 }
871                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
872                         }
873                         m->gobble(inOligos);
874                 }       
875                 inOligos.close();
876                 
877                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
878                 
879                 //add in potential combos
880                 if(barcodeNameVector.size() == 0){
881                         barcodes[""] = 0;
882                         barcodeNameVector.push_back("");                        
883                 }
884                 
885                 if(primerNameVector.size() == 0){
886                         primers[""] = 0;
887                         primerNameVector.push_back("");                 
888                 }
889                 
890                 fastaFileNames.resize(barcodeNameVector.size());
891                 for(int i=0;i<fastaFileNames.size();i++){
892                         fastaFileNames[i].assign(primerNameVector.size(), "");
893                 }
894                 if(qFileName != ""){    qualFileNames = fastaFileNames; }
895                 
896                 if(allFiles){
897                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
898                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
899                                         
900                                         string primerName = primerNameVector[itPrimer->second];
901                                         string barcodeName = barcodeNameVector[itBar->second];
902                                         
903                                         string comboGroupName = "";
904                                         string fastaFileName = "";
905                                         string qualFileName = "";
906                                         
907                                         if(primerName == ""){
908                                                 comboGroupName = barcodeNameVector[itBar->second];
909                                         }
910                                         else{
911                                                 if(barcodeName == ""){
912                                                         comboGroupName = primerNameVector[itPrimer->second];
913                                                 }
914                                                 else{
915                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
916                                                 }
917                                         }
918
919                                         ofstream temp;
920                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
921                                         outputNames.push_back(fastaFileName);
922                                         outputTypes["fasta"].push_back(fastaFileName);
923                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
924                                         m->openOutputFile(fastaFileName, temp);         temp.close();
925
926                                         if(qFileName != ""){
927                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
928                                                 outputNames.push_back(qualFileName);
929                                                 outputTypes["qfile"].push_back(qualFileName);
930                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
931                                                 m->openOutputFile(qualFileName, temp);          temp.close();
932                                         }
933                                 }
934                         }
935                 }
936                 numFPrimers = primers.size();
937                 numRPrimers = revPrimer.size();
938
939         }
940         catch(exception& e) {
941                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
942                 exit(1);
943         }
944 }
945
946 //***************************************************************************************************************
947
948 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
949         try {
950                 
951                 string rawSequence = seq.getUnaligned();
952                 int success = bdiffs + 1;       //guilty until proven innocent
953                 
954                 //can you find the barcode
955                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
956                         string oligo = it->first;
957                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
958                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
959                                 break;  
960                         }
961                         
962                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
963                                 group = it->second;
964                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
965                                 
966                                 if(qual.getName() != ""){
967                                         qual.trimQScores(oligo.length(), -1);
968                                 }
969                                 
970                                 success = 0;
971                                 break;
972                         }
973                 }
974                 
975                 //if you found the barcode or if you don't want to allow for diffs
976                 if ((bdiffs == 0) || (success == 0)) { return success;  }
977                 
978                 else { //try aligning and see if you can find it
979
980                         int maxLength = 0;
981
982                         Alignment* alignment;
983                         if (barcodes.size() > 0) {
984                                 map<string,int>::iterator it=barcodes.begin();
985
986                                 for(it;it!=barcodes.end();it++){
987                                         if(it->first.length() > maxLength){
988                                                 maxLength = it->first.length();
989                                         }
990                                 }
991                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
992
993                         }else{ alignment = NULL; } 
994                         
995                         //can you find the barcode
996                         int minDiff = 1e6;
997                         int minCount = 1;
998                         int minGroup = -1;
999                         int minPos = 0;
1000                         
1001                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1002                                 string oligo = it->first;
1003 //                              int length = oligo.length();
1004                                 
1005                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
1006                                         success = bdiffs + 10;
1007                                         break;
1008                                 }
1009                                 
1010                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1011                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1012                                 oligo = alignment->getSeqAAln();
1013                                 string temp = alignment->getSeqBAln();
1014                 
1015                                 int alnLength = oligo.length();
1016                                 
1017                                 for(int i=oligo.length()-1;i>=0;i--){
1018                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1019                                 }
1020                                 oligo = oligo.substr(0,alnLength);
1021                                 temp = temp.substr(0,alnLength);
1022                                 
1023                                 int numDiff = countDiffs(oligo, temp);
1024                                 
1025                                 if(numDiff < minDiff){
1026                                         minDiff = numDiff;
1027                                         minCount = 1;
1028                                         minGroup = it->second;
1029                                         minPos = 0;
1030                                         for(int i=0;i<alnLength;i++){
1031                                                 if(temp[i] != '-'){
1032                                                         minPos++;
1033                                                 }
1034                                         }
1035                                 }
1036                                 else if(numDiff == minDiff){
1037                                         minCount++;
1038                                 }
1039
1040                         }
1041
1042                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1043                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1044                         else{                                                                                                   //use the best match
1045                                 group = minGroup;
1046                                 seq.setUnaligned(rawSequence.substr(minPos));
1047                                 
1048                                 if(qual.getName() != ""){
1049                                         qual.trimQScores(minPos, -1);
1050                                 }
1051                                 success = minDiff;
1052                         }
1053                         
1054                         if (alignment != NULL) {  delete alignment;  }
1055                         
1056                 }
1057                 
1058                 return success;
1059                 
1060         }
1061         catch(exception& e) {
1062                 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1063                 exit(1);
1064         }
1065
1066 }
1067
1068 //***************************************************************************************************************
1069
1070 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1071         try {
1072                 string rawSequence = seq.getUnaligned();
1073                 int success = pdiffs + 1;       //guilty until proven innocent
1074                 
1075                 //can you find the primer
1076                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1077                         string oligo = it->first;
1078                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1079                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1080                                 break;  
1081                         }
1082                         
1083                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1084                                 group = it->second;
1085                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1086                                 if(qual.getName() != ""){
1087                                         qual.trimQScores(oligo.length(), -1);
1088                                 }
1089                                 success = 0;
1090                                 break;
1091                         }
1092                 }
1093
1094                 //if you found the barcode or if you don't want to allow for diffs
1095                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1096                 
1097                 else { //try aligning and see if you can find it
1098
1099                         int maxLength = 0;
1100
1101                         Alignment* alignment;
1102                         if (primers.size() > 0) {
1103                                 map<string,int>::iterator it=primers.begin();
1104
1105                                 for(it;it!=primers.end();it++){
1106                                         if(it->first.length() > maxLength){
1107                                                 maxLength = it->first.length();
1108                                         }
1109                                 }
1110                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
1111
1112                         }else{ alignment = NULL; } 
1113                         
1114                         //can you find the barcode
1115                         int minDiff = 1e6;
1116                         int minCount = 1;
1117                         int minGroup = -1;
1118                         int minPos = 0;
1119                         
1120                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1121                                 string oligo = it->first;
1122 //                              int length = oligo.length();
1123                                 
1124                                 if(rawSequence.length() < maxLength){   
1125                                         success = pdiffs + 100;
1126                                         break;
1127                                 }
1128                                 
1129                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1130                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1131                                 oligo = alignment->getSeqAAln();
1132                                 string temp = alignment->getSeqBAln();
1133                 
1134                                 int alnLength = oligo.length();
1135                                 
1136                                 for(int i=oligo.length()-1;i>=0;i--){
1137                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1138                                 }
1139                                 oligo = oligo.substr(0,alnLength);
1140                                 temp = temp.substr(0,alnLength);
1141                                 
1142                                 int numDiff = countDiffs(oligo, temp);
1143                                 
1144                                 if(numDiff < minDiff){
1145                                         minDiff = numDiff;
1146                                         minCount = 1;
1147                                         minGroup = it->second;
1148                                         minPos = 0;
1149                                         for(int i=0;i<alnLength;i++){
1150                                                 if(temp[i] != '-'){
1151                                                         minPos++;
1152                                                 }
1153                                         }
1154                                 }
1155                                 else if(numDiff == minDiff){
1156                                         minCount++;
1157                                 }
1158
1159                         }
1160
1161                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1162                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1163                         else{                                                                                                   //use the best match
1164                                 group = minGroup;
1165                                 seq.setUnaligned(rawSequence.substr(minPos));
1166                                 if(qual.getName() != ""){
1167                                         qual.trimQScores(minPos, -1);
1168                                 }
1169                                 success = minDiff;
1170                         }
1171                         
1172                         if (alignment != NULL) {  delete alignment;  }
1173                         
1174                 }
1175                 
1176                 return success;
1177
1178         }
1179         catch(exception& e) {
1180                 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1181                 exit(1);
1182         }
1183 }
1184
1185 //***************************************************************************************************************
1186
1187 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1188         try {
1189                 string rawSequence = seq.getUnaligned();
1190                 bool success = 0;       //guilty until proven innocent
1191                 
1192                 for(int i=0;i<numRPrimers;i++){
1193                         string oligo = revPrimer[i];
1194                         
1195                         if(rawSequence.length() < oligo.length()){
1196                                 success = 0;
1197                                 break;
1198                         }
1199                         
1200                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1201                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1202                                 if(qual.getName() != ""){
1203                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1204                                 }
1205                                 success = 1;
1206                                 break;
1207                         }
1208                 }       
1209                 return success;
1210                 
1211         }
1212         catch(exception& e) {
1213                 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1214                 exit(1);
1215         }
1216 }
1217
1218 //***************************************************************************************************************
1219
1220 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1221         try {
1222                 bool success = 1;
1223                 if(qscores.getName() != ""){
1224                         qscores.trimQScores(-1, keepFirst);
1225                 }
1226                 sequence.trim(keepFirst);
1227                 return success;
1228         }
1229         catch(exception& e) {
1230                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1231                 exit(1);
1232         }
1233         
1234 }       
1235
1236 //***************************************************************************************************************
1237
1238 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1239         try {
1240                 bool success = 0;
1241                 
1242                 int length = sequence.getNumBases() - removeLast;
1243                 
1244                 if(length > 0){
1245                         if(qscores.getName() != ""){
1246                                 qscores.trimQScores(-1, length);
1247                         }
1248                         sequence.trim(length);
1249                         success = 1;
1250                 }
1251                 else{
1252                         success = 0;
1253                 }
1254
1255                 return success;
1256         }
1257         catch(exception& e) {
1258                 m->errorOut(e, "removeLastTrim", "countDiffs");
1259                 exit(1);
1260         }
1261         
1262 }       
1263
1264 //***************************************************************************************************************
1265
1266 bool TrimSeqsCommand::cullLength(Sequence& seq){
1267         try {
1268         
1269                 int length = seq.getNumBases();
1270                 bool success = 0;       //guilty until proven innocent
1271                 
1272                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1273                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1274                 else                                                                                            {       success = 0;    }
1275                 
1276                 return success;
1277         
1278         }
1279         catch(exception& e) {
1280                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1281                 exit(1);
1282         }
1283         
1284 }
1285
1286 //***************************************************************************************************************
1287
1288 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1289         try {
1290                 int longHomoP = seq.getLongHomoPolymer();
1291                 bool success = 0;       //guilty until proven innocent
1292                 
1293                 if(longHomoP <= maxHomoP){      success = 1;    }
1294                 else                                    {       success = 0;    }
1295                 
1296                 return success;
1297         }
1298         catch(exception& e) {
1299                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1300                 exit(1);
1301         }
1302         
1303 }
1304
1305 //***************************************************************************************************************
1306
1307 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1308         try {
1309                 int numNs = seq.getAmbigBases();
1310                 bool success = 0;       //guilty until proven innocent
1311                 
1312                 if(numNs <= maxAmbig)   {       success = 1;    }
1313                 else                                    {       success = 0;    }
1314                 
1315                 return success;
1316         }
1317         catch(exception& e) {
1318                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1319                 exit(1);
1320         }
1321         
1322 }
1323
1324 //***************************************************************************************************************
1325
1326 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1327         try {
1328                 bool success = 1;
1329                 int length = oligo.length();
1330                 
1331                 for(int i=0;i<length;i++){
1332                         
1333                         if(oligo[i] != seq[i]){
1334                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1335                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1336                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1337                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1338                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1339                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1340                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1341                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1342                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1343                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1344                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1345                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1346                                 
1347                                 if(success == 0)        {       break;   }
1348                         }
1349                         else{
1350                                 success = 1;
1351                         }
1352                 }
1353                 
1354                 return success;
1355         }
1356         catch(exception& e) {
1357                 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1358                 exit(1);
1359         }
1360
1361 }
1362
1363 //***************************************************************************************************************
1364
1365 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1366         try {
1367
1368                 int length = oligo.length();
1369                 int countDiffs = 0;
1370                 
1371                 for(int i=0;i<length;i++){
1372                                                                 
1373                         if(oligo[i] != seq[i]){
1374                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1375                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1376                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1377                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1378                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1379                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1380                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1381                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1382                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1383                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1384                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1385                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1386                         }
1387                         
1388                 }
1389                 
1390                 return countDiffs;
1391         }
1392         catch(exception& e) {
1393                 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1394                 exit(1);
1395         }
1396
1397 }
1398
1399 //***************************************************************************************************************