]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
added shhh.seqs command
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //**********************************************************************************************************************
14
15 vector<string> TrimSeqsCommand::getValidParameters(){   
16         try {
17                 string Array[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile", 
18                                                                         "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
19                                                                         "keepfirst", "removelast",
20                                                                         "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
21                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
22                 return myArray;
23         }
24         catch(exception& e) {
25                 m->errorOut(e, "TrimSeqsCommand", "getValidParameters");
26                 exit(1);
27         }
28 }
29
30 //**********************************************************************************************************************
31
32 TrimSeqsCommand::TrimSeqsCommand(){     
33         try {
34                 abort = true;
35                 //initialize outputTypes
36                 vector<string> tempOutNames;
37                 outputTypes["fasta"] = tempOutNames;
38                 outputTypes["qual"] = tempOutNames;
39                 outputTypes["group"] = tempOutNames;
40         }
41         catch(exception& e) {
42                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
43                 exit(1);
44         }
45 }
46
47 //**********************************************************************************************************************
48
49 vector<string> TrimSeqsCommand::getRequiredParameters(){        
50         try {
51                 string Array[] =  {"fasta"};
52                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
53                 return myArray;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "TrimSeqsCommand", "getRequiredParameters");
57                 exit(1);
58         }
59 }
60
61 //**********************************************************************************************************************
62
63 vector<string> TrimSeqsCommand::getRequiredFiles(){     
64         try {
65                 vector<string> myArray;
66                 return myArray;
67         }
68         catch(exception& e) {
69                 m->errorOut(e, "TrimSeqsCommand", "getRequiredFiles");
70                 exit(1);
71         }
72 }
73
74 //***************************************************************************************************************
75
76 TrimSeqsCommand::TrimSeqsCommand(string option)  {
77         try {
78                 
79                 abort = false;
80                 comboStarts = 0;
81                 
82                 //allow user to run help
83                 if(option == "help") { help(); abort = true; }
84                 
85                 else {
86                         //valid paramters for this command
87                         string AlignArray[] =  {        "fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile", 
88                                                                 "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
89                                                                 "keepfirst", "removelast",
90                                                                 "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
91                         
92                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
93                         
94                         OptionParser parser(option);
95                         map<string,string> parameters = parser.getParameters();
96                         
97                         ValidParameters validParameter;
98                         map<string,string>::iterator it;
99                         
100                         //check to make sure all parameters are valid for command
101                         for (it = parameters.begin(); it != parameters.end(); it++) { 
102                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
103                         }
104                         
105                         //initialize outputTypes
106                         vector<string> tempOutNames;
107                         outputTypes["fasta"] = tempOutNames;
108                         outputTypes["qual"] = tempOutNames;
109                         outputTypes["group"] = tempOutNames;
110                         
111                         //if the user changes the input directory command factory will send this info to us in the output parameter 
112                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
113                         if (inputDir == "not found"){   inputDir = "";          }
114                         else {
115                                 string path;
116                                 it = parameters.find("fasta");
117                                 //user has given a template file
118                                 if(it != parameters.end()){ 
119                                         path = m->hasPath(it->second);
120                                         //if the user has not given a path then, add inputdir. else leave path alone.
121                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
122                                 }
123                                 
124                                 it = parameters.find("oligos");
125                                 //user has given a template file
126                                 if(it != parameters.end()){ 
127                                         path = m->hasPath(it->second);
128                                         //if the user has not given a path then, add inputdir. else leave path alone.
129                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
130                                 }
131                                 
132                                 it = parameters.find("qfile");
133                                 //user has given a template file
134                                 if(it != parameters.end()){ 
135                                         path = m->hasPath(it->second);
136                                         //if the user has not given a path then, add inputdir. else leave path alone.
137                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
138                                 }
139                                 
140                                 it = parameters.find("group");
141                                 //user has given a template file
142                                 if(it != parameters.end()){ 
143                                         path = m->hasPath(it->second);
144                                         //if the user has not given a path then, add inputdir. else leave path alone.
145                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
146                                 }
147                         }
148
149                         
150                         //check for required parameters
151                         fastaFile = validParameter.validFile(parameters, "fasta", true);
152                         if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the trim.seqs command."); m->mothurOutEndLine(); abort = true; }
153                         else if (fastaFile == "not open") { abort = true; }     
154                         
155                         //if the user changes the output directory command factory will send this info to us in the output parameter 
156                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
157                                 outputDir = ""; 
158                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
159                         }
160                 
161                         
162                         //check for optional parameter and set defaults
163                         // ...at some point should added some additional type checking...
164                         string temp;
165                         temp = validParameter.validFile(parameters, "flip", false);
166                         if (temp == "not found"){       flip = 0;       }
167                         else if(m->isTrue(temp))        {       flip = 1;       }
168                 
169                         temp = validParameter.validFile(parameters, "oligos", true);
170                         if (temp == "not found"){       oligoFile = "";         }
171                         else if(temp == "not open"){    abort = true;   } 
172                         else                                    {       oligoFile = temp;               }
173                         
174                         temp = validParameter.validFile(parameters, "group", true);
175                         if (temp == "not found"){       groupfile = "";         }
176                         else if(temp == "not open"){    abort = true;   } 
177                         else                                    {       groupfile = temp;               }
178                         
179                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
180                         convert(temp, maxAmbig);  
181
182                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
183                         convert(temp, maxHomoP);  
184
185                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
186                         convert(temp, minLength); 
187                         
188                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
189                         convert(temp, maxLength);
190                         
191                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
192                         convert(temp, bdiffs);
193                         
194                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
195                         convert(temp, pdiffs);
196                         
197                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
198                         convert(temp, tdiffs);
199                         
200                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
201                         
202                         temp = validParameter.validFile(parameters, "qfile", true);     
203                         if (temp == "not found")        {       qFileName = "";         }
204                         else if(temp == "not open")     {       abort = true;           }
205                         else                                            {       qFileName = temp;       }
206                         
207                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
208                         convert(temp, qThreshold);
209                         
210                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "F"; }
211                         qtrim = m->isTrue(temp);
212
213                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
214                         convert(temp, qRollAverage);
215
216                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
217                         convert(temp, qWindowAverage);
218
219                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
220                         convert(temp, qWindowSize);
221
222                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
223                         convert(temp, qWindowStep);
224
225                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
226                         convert(temp, qAverage);
227
228                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
229                         convert(temp, keepFirst);
230
231                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
232                         convert(temp, removeLast);
233                         
234                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
235                         allFiles = m->isTrue(temp);
236                         
237                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found") { temp = "1"; }
238                         convert(temp, processors); 
239                         
240                         if ((oligoFile != "") && (groupfile != "")) {
241                                 m->mothurOut("You given both a oligos file and a groupfile, only one is allowed."); m->mothurOutEndLine(); abort = true;
242                         }
243                                                                                                 
244                         
245                         if(allFiles && (oligoFile == "") && (groupfile == "")){
246                                 m->mothurOut("You selected allfiles, but didn't enter an oligos or group file.  Ignoring the allfiles request."); m->mothurOutEndLine();
247                         }
248                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
249                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
250                                 qAverage=0;
251                                 qThreshold=0;
252                         }
253                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
254                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
255                                 abort = true;
256                         }
257                 }
258
259         }
260         catch(exception& e) {
261                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
262                 exit(1);
263         }
264 }
265
266 //**********************************************************************************************************************
267
268 void TrimSeqsCommand::help(){
269         try {
270                 m->mothurOut("The trim.seqs command reads a fastaFile and creates .....\n");
271                 m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, group, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n");
272                 m->mothurOut("The fasta parameter is required.\n");
273                 m->mothurOut("The group parameter allows you to enter a group file for your fasta file.\n");
274                 m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n");
275                 m->mothurOut("The oligos parameter .... The default is "".\n");
276                 m->mothurOut("The maxambig parameter .... The default is -1.\n");
277                 m->mothurOut("The maxhomop parameter .... The default is 0.\n");
278                 m->mothurOut("The minlength parameter .... The default is 0.\n");
279                 m->mothurOut("The maxlength parameter .... The default is 0.\n");
280                 m->mothurOut("The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n");
281                 m->mothurOut("The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n");
282                 m->mothurOut("The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n");
283                 m->mothurOut("The qfile parameter .....\n");
284                 m->mothurOut("The qthreshold parameter .... The default is 0.\n");
285                 m->mothurOut("The qaverage parameter .... The default is 0.\n");
286                 m->mothurOut("The allfiles parameter .... The default is F.\n");
287                 m->mothurOut("The qtrim parameter .... The default is F.\n");
288                 m->mothurOut("The trim.seqs command should be in the following format: \n");
289                 m->mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n");
290                 m->mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n");    
291                 m->mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
292                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
293                 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
294
295         }
296         catch(exception& e) {
297                 m->errorOut(e, "TrimSeqsCommand", "help");
298                 exit(1);
299         }
300 }
301
302
303 //***************************************************************************************************************
304
305 TrimSeqsCommand::~TrimSeqsCommand(){    /*      do nothing      */      }
306
307 //***************************************************************************************************************
308
309 int TrimSeqsCommand::execute(){
310         try{
311         
312                 if (abort == true) { return 0; }
313                 
314                 numFPrimers = 0;  //this needs to be initialized
315                 numRPrimers = 0;
316                 vector<string> fastaFileNames;
317                 vector<string> qualFileNames;
318                 
319                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
320                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
321                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
322                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
323                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
324                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
325                 if (qFileName != "") {  outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile);  outputTypes["qual"].push_back(trimQualFile); outputTypes["qual"].push_back(scrapQualFile); }
326                 string groupFile = "";
327                 if (groupfile == "") { groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; }
328                 else{
329                         groupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "trim.groups";
330                         outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile);
331                         groupMap = new GroupMap(groupfile);
332                         groupMap->readMap();
333                         
334                         if(allFiles){
335                                 for (int i = 0; i < groupMap->namesOfGroups.size(); i++) {
336                                         groupToIndex[groupMap->namesOfGroups[i]] = i;
337                                         groupVector.push_back(groupMap->namesOfGroups[i]);
338                                         fastaFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) +  groupMap->namesOfGroups[i] + ".fasta"));
339                                         
340                                         //we append later, so we want to clear file
341                                         ofstream outRemove;
342                                         m->openOutputFile(fastaFileNames[i], outRemove);
343                                         outRemove.close();
344                                         if(qFileName != ""){
345                                                 qualFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) +  groupMap->namesOfGroups[i] + ".qual"));
346                                                 ofstream outRemove2;
347                                                 m->openOutputFile(qualFileNames[i], outRemove2);
348                                                 outRemove2.close();
349                                         }
350                                 }
351                         }
352                         comboStarts = fastaFileNames.size()-1;
353                 }
354                 
355                 if(oligoFile != ""){
356                         outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile);
357                         getOligos(fastaFileNames, qualFileNames);
358                 }
359
360                 vector<unsigned long int> fastaFilePos;
361                 vector<unsigned long int> qFilePos;
362                 
363                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
364                 
365                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
366                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
367                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
368                 }       
369                 if(qFileName == "")     {       qLines = lines; } //files with duds
370                 
371                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
372                                 if(processors == 1){
373                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
374                                 }else{
375                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames); 
376                                 }       
377                 #else
378                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
379                 #endif
380                 
381                 if (m->control_pressed) {  return 0; }                  
382                         
383                 set<string> blanks;
384                 for(int i=0;i<fastaFileNames.size();i++){
385                         if (m->isBlank(fastaFileNames[i])) {   blanks.insert(fastaFileNames[i]);        }
386                         else if (filesToRemove.count(fastaFileNames[i]) > 0) {  remove(fastaFileNames[i].c_str()); }
387                         else {
388                                 ifstream inFASTA;
389                                 string seqName;
390                                 m->openInputFile(fastaFileNames[i], inFASTA);
391                                 ofstream outGroups;
392                                 string outGroupFilename = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + "groups";
393                                 
394                                 //if the fastafile is on the blanks list then the groups file should be as well
395                                 if (blanks.count(fastaFileNames[i]) != 0) { blanks.insert(outGroupFilename); }
396                                 
397                                 m->openOutputFile(outGroupFilename, outGroups);
398                                 outputNames.push_back(outGroupFilename); outputTypes["group"].push_back(outGroupFilename);  
399                                 
400                                 string thisGroup = "";
401                                 if (i > comboStarts) {
402                                         map<string, int>::iterator itCombo;
403                                         for(itCombo=combos.begin();itCombo!=combos.end(); itCombo++){
404                                                 if(itCombo->second == i){       thisGroup = itCombo->first;     combos.erase(itCombo);  break;  }
405                                         }
406                                 }else{ thisGroup = groupVector[i]; }
407                                         
408                                 while(!inFASTA.eof()){
409                                         if(inFASTA.get() == '>'){
410                                                 inFASTA >> seqName;
411                                                 outGroups << seqName << '\t' << thisGroup << endl;
412                                         }
413                                         while (!inFASTA.eof())  {       char c = inFASTA.get(); if (c == 10 || c == 13){        break;  }       }
414                                 }
415                                 outGroups.close();
416                                 inFASTA.close();
417                         }
418                 }
419                 
420                 for (set<string>::iterator itBlanks = blanks.begin(); itBlanks != blanks.end(); itBlanks++) {  remove((*(itBlanks)).c_str()); }
421                 
422                 blanks.clear();
423                 if(qFileName != ""){
424                         for(int i=0;i<qualFileNames.size();i++){
425                                 if (m->isBlank(qualFileNames[i])) {  blanks.insert(qualFileNames[i]);   }
426                                 else if (filesToRemove.count(qualFileNames[i]) > 0) {  remove(qualFileNames[i].c_str()); }
427                         }
428                 }
429                 
430                 for (set<string>::iterator itBlanks = blanks.begin(); itBlanks != blanks.end(); itBlanks++) {  remove((*(itBlanks)).c_str()); }
431                 
432                 if (m->control_pressed) { 
433                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
434                         return 0;
435                 }
436
437                 m->mothurOutEndLine();
438                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
439                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
440                 m->mothurOutEndLine();
441                 
442                 return 0;       
443                         
444         }
445         catch(exception& e) {
446                 m->errorOut(e, "TrimSeqsCommand", "execute");
447                 exit(1);
448         }
449 }
450                 
451 /**************************************************************************************/
452
453 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames, linePair* line, linePair* qline) {      
454                 
455         try {
456                 
457                 ofstream outFASTA;
458                 int able = m->openOutputFile(trimFile, outFASTA);
459                 
460                 ofstream scrapFASTA;
461                 m->openOutputFile(scrapFile, scrapFASTA);
462                 
463                 ofstream outQual;
464                 ofstream scrapQual;
465                 if(qFileName != ""){
466                         m->openOutputFile(trimQFile, outQual);
467                         m->openOutputFile(scrapQFile, scrapQual);
468                 }
469                 
470                 ofstream outGroups;
471                 
472                 if (oligoFile != "") {          
473                         m->openOutputFile(groupFile, outGroups);   
474                 }
475                 
476                 ifstream inFASTA;
477                 m->openInputFile(filename, inFASTA);
478                 inFASTA.seekg(line->start);
479                 
480                 ifstream qFile;
481                 if(qFileName != "")     {       m->openInputFile(qFileName, qFile);     qFile.seekg(qline->start);  }
482                 
483                 
484                 for (int i = 0; i < fastaNames.size(); i++) { //clears old file
485                         ofstream temp;
486                         m->openOutputFile(fastaNames[i], temp);
487                         temp.close();
488                 }
489                 for (int i = 0; i < qualNames.size(); i++) { //clears old file
490                         ofstream temp;
491                         m->openOutputFile(qualNames[i], temp);
492                         temp.close();
493                 }
494                 
495                         
496                 bool done = false;
497                 int count = 0;
498         
499                 while (!done) {
500                                 
501                         if (m->control_pressed) { 
502                                 inFASTA.close(); outFASTA.close(); scrapFASTA.close();
503                                 if (oligoFile != "") {   outGroups.close();   }
504
505                                 if(qFileName != ""){
506                                         qFile.close();
507                                 }
508                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
509
510                                 return 0;
511                         }
512                         
513                         int success = 1;
514                         
515
516                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
517
518                         QualityScores currQual;
519                         if(qFileName != ""){
520                                 currQual = QualityScores(qFile, currSeq.getNumBases());  m->gobble(qFile);
521                         }
522                         
523                         string origSeq = currSeq.getUnaligned();
524                         if (origSeq != "") {
525                                 int groupBar, groupPrime;
526                                 string trashCode = "";
527                                 int currentSeqsDiffs = 0;
528
529                                 if(barcodes.size() != 0){
530                                         success = stripBarcode(currSeq, currQual, groupBar);
531                                         if(success > bdiffs)            {       trashCode += 'b';       }
532                                         else{ currentSeqsDiffs += success;  }
533                                 }
534                                 
535                                 if(numFPrimers != 0){
536                                         success = stripForward(currSeq, currQual, groupPrime);
537                                         if(success > pdiffs)            {       trashCode += 'f';       }
538                                         else{ currentSeqsDiffs += success;  }
539                                 }
540                                 
541                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
542                                 
543                                 if(numRPrimers != 0){
544                                         success = stripReverse(currSeq, currQual);
545                                         if(!success)                            {       trashCode += 'r';       }
546                                 }
547
548                                 if(keepFirst != 0){
549                                         success = keepFirstTrim(currSeq, currQual);
550                                 }
551                                 
552                                 if(removeLast != 0){
553                                         success = removeLastTrim(currSeq, currQual);
554                                         if(!success)                            {       trashCode += 'l';       }
555                                 }
556
557                                 
558                                 if(qFileName != ""){
559                                         
560                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
561                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
562                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
563                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
564                                         else                                            {       success = 1;                            }
565
566 //                                      if (qtrim == 1 && (origSeq.length() != currSeq.getUnaligned().length())) { 
567 //                                              success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
568 //                                      }
569                                         
570                                         if(!success)                            {       trashCode += 'q';       }
571                                 }                               
572                 
573                                 if(minLength > 0 || maxLength > 0){
574                                         success = cullLength(currSeq);
575                                         if(!success)                            {       trashCode += 'l';       }
576                                 }
577                                 if(maxHomoP > 0){
578                                         success = cullHomoP(currSeq);
579                                         if(!success)                            {       trashCode += 'h';       }
580                                 }
581                                 if(maxAmbig != -1){
582                                         success = cullAmbigs(currSeq);
583                                         if(!success)                            {       trashCode += 'n';       }
584                                 }
585                                 
586                                 if(flip){               // should go last                       
587                                         currSeq.reverseComplement();
588                                         currQual.flipQScores(); 
589                                 }
590                                 
591                                 if(trashCode.length() == 0){
592                                         currSeq.setAligned(currSeq.getUnaligned());
593                                         currSeq.printSequence(outFASTA);
594                                         currQual.printQScores(outQual);
595                                         
596                                         if(barcodes.size() != 0){
597                                                 string thisGroup = groupVector[groupBar];
598                                                 int indexToFastaFile = groupBar;
599                                                 if (primers.size() != 0){
600                                                         //does this primer have a group
601                                                         if (groupVector[groupPrime] != "") {  
602                                                                 thisGroup += "." + groupVector[groupPrime]; 
603                                                                 indexToFastaFile = combos[thisGroup];
604                                                         }
605                                                 }
606                                                 outGroups << currSeq.getName() << '\t' << thisGroup << endl;
607                                                 if(allFiles){
608                                                         ofstream outTemp;
609                                                         m->openOutputFileAppend(fastaNames[indexToFastaFile], outTemp);
610                                                         //currSeq.printSequence(*fastaFileNames[indexToFastaFile]);
611                                                         currSeq.printSequence(outTemp);
612                                                         outTemp.close();
613                                                         
614                                                         if(qFileName != ""){
615                                                                 //currQual.printQScores(*qualFileNames[indexToFastaFile]);
616                                                                 ofstream outTemp2;
617                                                                 m->openOutputFileAppend(qualNames[indexToFastaFile], outTemp2);
618                                                                 currQual.printQScores(outTemp2);
619                                                                 outTemp2.close();                                                       
620                                                         }
621                                                 }
622                                         }
623                                         
624                                         if (groupfile != "") {
625                                                 string thisGroup = groupMap->getGroup(currSeq.getName());
626                                                 
627                                                 if (thisGroup != "not found") {
628                                                         outGroups << currSeq.getName() << '\t' << thisGroup << endl;
629                                                         if (allFiles) {
630                                                                 ofstream outTemp;
631                                                                 m->openOutputFileAppend(fastaNames[groupToIndex[thisGroup]], outTemp);
632                                                                 currSeq.printSequence(outTemp);
633                                                                 outTemp.close();
634                                                                 if(qFileName != ""){
635                                                                         ofstream outTemp2;
636                                                                         m->openOutputFileAppend(qualNames[groupToIndex[thisGroup]], outTemp2);
637                                                                         currQual.printQScores(outTemp2);
638                                                                         outTemp2.close();                                                       
639                                                                 }
640                                                         }
641                                                 }else{
642                                                         m->mothurOut(currSeq.getName() + " is not in your groupfile, adding to group XXX."); m->mothurOutEndLine();
643                                                         outGroups << currSeq.getName() << '\t' << "XXX" << endl;
644                                                         if (allFiles) {  
645                                                                 m->mothurOut("[ERROR]: " + currSeq.getName() + " will not be added to any .group.fasta or .group.qual file."); m->mothurOutEndLine();
646                                                         }
647                                                 }
648                                         }
649                                 }
650                                 else{
651                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
652                                         currSeq.setUnaligned(origSeq);
653                                         currSeq.setAligned(origSeq);
654                                         currSeq.printSequence(scrapFASTA);
655                                         currQual.printQScores(scrapQual);
656                                 }
657                                 count++;
658                         }
659                         
660                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
661                                 unsigned long int pos = inFASTA.tellg();
662                                 if ((pos == -1) || (pos >= line->end)) { break; }
663                         #else
664                                 if (inFASTA.eof()) { break; }
665                         #endif
666                                 
667                         //report progress
668                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
669                         
670                 }
671                 //report progress
672                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
673
674                 
675                 inFASTA.close();
676                 outFASTA.close();
677                 scrapFASTA.close();
678                 if (oligoFile != "") {   outGroups.close();   }
679                 if(qFileName != "")     {       qFile.close();  scrapQual.close(); outQual.close();     }
680                 
681                 return count;
682         }
683         catch(exception& e) {
684                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
685                 exit(1);
686         }
687 }
688
689 /**************************************************************************************************/
690
691 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames) {
692         try {
693 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
694                 int process = 1;
695                 int exitCommand = 1;
696                 processIDS.clear();
697                 
698                 //loop through and create all the processes you want
699                 while (process != processors) {
700                         int pid = fork();
701                         
702                         if (pid > 0) {
703                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
704                                 process++;
705                         }else if (pid == 0){
706                                 for (int i = 0; i < fastaNames.size(); i++) {
707                                         fastaNames[i] = (fastaNames[i] + toString(getpid()) + ".temp");
708                                         //clear old file if it exists
709                                         ofstream temp;
710                                         m->openOutputFile(fastaNames[i], temp);
711                                         temp.close();
712                                         if(qFileName != ""){
713                                                 qualNames[i] = (qualNames[i] + toString(getpid()) + ".temp");
714                                                 //clear old file if it exists
715                                                 ofstream temp2;
716                                                 m->openOutputFile(qualNames[i], temp2);
717                                                 temp2.close();
718                                         }
719                                 }
720                                 
721                                 driverCreateTrim(filename, qFileName, (trimFile + toString(getpid()) + ".temp"), (scrapFile + toString(getpid()) + ".temp"), (trimQFile + toString(getpid()) + ".temp"), (scrapQFile + toString(getpid()) + ".temp"), (groupFile + toString(getpid()) + ".temp"), fastaNames, qualNames, lines[process], qLines[process]);
722                                 exit(0);
723                         }else { 
724                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
725                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
726                                 exit(0);
727                         }
728                 }
729                 
730                 //parent do my part
731                 for (int i = 0; i < fastaNames.size(); i++) {
732                         //clear old file if it exists
733                         ofstream temp;
734                         m->openOutputFile(fastaNames[i], temp);
735                         temp.close();
736                         if(qFileName != ""){
737                                 //clear old file if it exists
738                                 ofstream temp2;
739                                 m->openOutputFile(qualNames[i], temp2);
740                                 temp2.close();
741                         }
742                 }
743                 
744                 driverCreateTrim(filename, qFileName, trimFile, scrapFile, trimQFile, scrapQFile, groupFile, fastaNames, qualNames, lines[0], qLines[0]);
745                 
746                 
747                 //force parent to wait until all the processes are done
748                 for (int i=0;i<processIDS.size();i++) { 
749                         int temp = processIDS[i];
750                         wait(&temp);
751                 }
752                 
753                 //append files
754                 for(int i=0;i<processIDS.size();i++){
755                         
756                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
757                         
758                         m->appendFiles((trimFile + toString(processIDS[i]) + ".temp"), trimFile);
759                         remove((trimFile + toString(processIDS[i]) + ".temp").c_str());
760                         m->appendFiles((scrapFile + toString(processIDS[i]) + ".temp"), scrapFile);
761                         remove((scrapFile + toString(processIDS[i]) + ".temp").c_str());
762                         
763                         m->mothurOut("Done with fasta files"); m->mothurOutEndLine();
764                         
765                         if(qFileName != ""){
766                                 m->appendFiles((trimQFile + toString(processIDS[i]) + ".temp"), trimQFile);
767                                 remove((trimQFile + toString(processIDS[i]) + ".temp").c_str());
768                                 m->appendFiles((scrapQFile + toString(processIDS[i]) + ".temp"), scrapQFile);
769                                 remove((scrapQFile + toString(processIDS[i]) + ".temp").c_str());
770                         
771                                 m->mothurOut("Done with quality files"); m->mothurOutEndLine();
772                         }
773                         
774                         m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
775                         remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
776                         
777                         m->mothurOut("Done with group file"); m->mothurOutEndLine();
778                         
779                         for (int j = 0; j < fastaNames.size(); j++) {
780                                 m->appendFiles((fastaNames[j] + toString(processIDS[i]) + ".temp"), fastaNames[j]);
781                                 remove((fastaNames[j] + toString(processIDS[i]) + ".temp").c_str());
782                         }
783                         
784                         if(qFileName != ""){
785                                 for (int j = 0; j < qualNames.size(); j++) {
786                                         m->appendFiles((qualNames[j] + toString(processIDS[i]) + ".temp"), qualNames[j]);
787                                         remove((qualNames[j] + toString(processIDS[i]) + ".temp").c_str());
788                                 }
789                         }
790                         
791                         if (allFiles) { m->mothurOut("Done with allfiles"); m->mothurOutEndLine(); }
792                 }
793         
794                 return exitCommand;
795 #endif          
796         }
797         catch(exception& e) {
798                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
799                 exit(1);
800         }
801 }
802
803 /**************************************************************************************************/
804
805 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
806         try {
807                 
808                 //set file positions for fasta file
809                 fastaFilePos = m->divideFile(filename, processors);
810                 
811                 if (qfilename == "") { return processors; }
812                 
813                 //get name of first sequence in each chunk
814                 map<string, int> firstSeqNames;
815                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
816                         ifstream in;
817                         m->openInputFile(filename, in);
818                         in.seekg(fastaFilePos[i]);
819                 
820                         Sequence temp(in); 
821                         firstSeqNames[temp.getName()] = i;
822                 
823                         in.close();
824                 }
825                                 
826                 //seach for filePos of each first name in the qfile and save in qfileFilePos
827                 ifstream inQual;
828                 m->openInputFile(qfilename, inQual);
829                 
830                 string input;
831                 while(!inQual.eof()){   
832                         input = m->getline(inQual);
833
834                         if (input.length() != 0) {
835                                 if(input[0] == '>'){ //this is a sequence name line
836                                         istringstream nameStream(input);
837                                         
838                                         string sname = "";  nameStream >> sname;
839                                         sname = sname.substr(1);
840                                         
841                                         map<string, int>::iterator it = firstSeqNames.find(sname);
842                                         
843                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
844                                                 unsigned long int pos = inQual.tellg(); 
845                                                 qfileFilePos.push_back(pos - input.length() - 1);       
846                                                 firstSeqNames.erase(it);
847                                         }
848                                 }
849                         }
850                         
851                         if (firstSeqNames.size() == 0) { break; }
852                 }
853                 inQual.close();
854                 
855                 
856                 if (firstSeqNames.size() != 0) { 
857                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
858                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
859                         }
860                         qFileName = "";
861                         return processors;
862                 }
863
864                 //get last file position of qfile
865                 FILE * pFile;
866                 unsigned long int size;
867                 
868                 //get num bytes in file
869                 pFile = fopen (qfilename.c_str(),"rb");
870                 if (pFile==NULL) perror ("Error opening file");
871                 else{
872                         fseek (pFile, 0, SEEK_END);
873                         size=ftell (pFile);
874                         fclose (pFile);
875                 }
876                 
877                 qfileFilePos.push_back(size);
878                 
879                 return processors;
880         }
881         catch(exception& e) {
882                 m->errorOut(e, "TrimSeqsCommand", "setLines");
883                 exit(1);
884         }
885 }
886
887 //***************************************************************************************************************
888
889 void TrimSeqsCommand::getOligos(vector<string>& outFASTAVec, vector<string>& outQualVec){
890         try {
891                 ifstream inOligos;
892                 m->openInputFile(oligoFile, inOligos);
893                 
894                 ofstream test;
895                 
896                 string type, oligo, group;
897                 int index=0;
898                 //int indexPrimer = 0;
899                 
900                 while(!inOligos.eof()){
901                         inOligos >> type; m->gobble(inOligos);
902                                         
903                         if(type[0] == '#'){
904                                 while (!inOligos.eof()) {       char c = inOligos.get(); if (c == 10 || c == 13){       break;  }       } // get rest of line if there's any crap there
905                         }
906                         else{
907                                 //make type case insensitive
908                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
909                                 
910                                 inOligos >> oligo;
911                                 
912                                 for(int i=0;i<oligo.length();i++){
913                                         oligo[i] = toupper(oligo[i]);
914                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
915                                 }
916                                 
917                                 if(type == "FORWARD"){
918                                         group = "";
919                                         
920                                         // get rest of line in case there is a primer name
921                                         while (!inOligos.eof()) {       
922                                                 char c = inOligos.get(); 
923                                                 if (c == 10 || c == 13){        break;  }
924                                                 else if (c == 32 || c == 9){;} //space or tab
925                                                 else {  group += c;  }
926                                         } 
927                                         
928                                         //check for repeat barcodes
929                                         map<string, int>::iterator itPrime = primers.find(oligo);
930                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
931                                         
932                                                 primers[oligo]=index; index++;
933                                                 groupVector.push_back(group);
934                                         
935                                                 if(allFiles){
936                                                         outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
937                                                         if(qFileName != ""){
938                                                                 outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
939                                                         }
940                                                         if (group == "") { //if there is not a group for this primer, then this file will not get written to, but we add it to keep the indexes correct
941                                                                 filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
942                                                                 if(qFileName != ""){
943                                                                         filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
944                                                                 }
945                                                         }else {
946                                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
947                                                                 outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
948                                                                 if(qFileName != ""){
949                                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
950                                                                         outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
951                                                                 }                                                       
952                                                         }
953                                                 }
954                                         
955                                 }
956                                 else if(type == "REVERSE"){
957                                         Sequence oligoRC("reverse", oligo);
958                                         oligoRC.reverseComplement();
959                                         revPrimer.push_back(oligoRC.getUnaligned());
960                                 }
961                                 else if(type == "BARCODE"){
962                                         inOligos >> group;
963                                         
964                                         //check for repeat barcodes
965                                         map<string, int>::iterator itBar = barcodes.find(oligo);
966                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
967                                         
968                                                 barcodes[oligo]=index; index++;
969                                                 groupVector.push_back(group);
970                                                 
971                                                 if(allFiles){
972                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
973                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
974                                                         outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
975                                                         if(qFileName != ""){
976                                                                 outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
977                                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
978                                                                 outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
979                                                         }                                                       
980                                                 }
981                                         
982                                 }else{  m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
983                         }
984                         m->gobble(inOligos);
985                 }
986                 
987                 inOligos.close();
988                 
989                 //add in potential combos
990                 if(allFiles){
991                         comboStarts = outFASTAVec.size()-1;
992                         for (map<string, int>::iterator itBar = barcodes.begin(); itBar != barcodes.end(); itBar++) {
993                                 for (map<string, int>::iterator itPrime = primers.begin(); itPrime != primers.end(); itPrime++) {
994                                         if (groupVector[itPrime->second] != "") { //there is a group for this primer
995                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
996                                                 outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
997                                                 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
998                                                 combos[(groupVector[itBar->second] + "." + groupVector[itPrime->second])] = outFASTAVec.size()-1;
999                                                 
1000                                                 if(qFileName != ""){
1001                                                         outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
1002                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
1003                                                         outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
1004                                                 }
1005                                         }
1006                                 }
1007                         }
1008                 }
1009                 
1010                 numFPrimers = primers.size();
1011                 numRPrimers = revPrimer.size();
1012                 
1013         }
1014         catch(exception& e) {
1015                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1016                 exit(1);
1017         }
1018 }
1019 //***************************************************************************************************************
1020
1021 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
1022         try {
1023                 
1024                 string rawSequence = seq.getUnaligned();
1025                 int success = bdiffs + 1;       //guilty until proven innocent
1026                 
1027                 //can you find the barcode
1028                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1029                         string oligo = it->first;
1030                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
1031                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1032                                 break;  
1033                         }
1034                         
1035                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1036                                 group = it->second;
1037                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1038                                 
1039                                 if(qual.getName() != ""){
1040                                         qual.trimQScores(oligo.length(), -1);
1041                                 }
1042                                 
1043                                 success = 0;
1044                                 break;
1045                         }
1046                 }
1047                 
1048                 //if you found the barcode or if you don't want to allow for diffs
1049 //              cout << success;
1050                 if ((bdiffs == 0) || (success == 0)) { return success;  }
1051                 
1052                 else { //try aligning and see if you can find it
1053 //                      cout << endl;
1054
1055                         int maxLength = 0;
1056
1057                         Alignment* alignment;
1058                         if (barcodes.size() > 0) {
1059                                 map<string,int>::iterator it=barcodes.begin();
1060
1061                                 for(it;it!=barcodes.end();it++){
1062                                         if(it->first.length() > maxLength){
1063                                                 maxLength = it->first.length();
1064                                         }
1065                                 }
1066                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
1067
1068                         }else{ alignment = NULL; } 
1069                         
1070                         //can you find the barcode
1071                         int minDiff = 1e6;
1072                         int minCount = 1;
1073                         int minGroup = -1;
1074                         int minPos = 0;
1075                         
1076                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1077                                 string oligo = it->first;
1078 //                              int length = oligo.length();
1079                                 
1080                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
1081                                         success = bdiffs + 10;
1082                                         break;
1083                                 }
1084                                 
1085                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1086                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1087                                 oligo = alignment->getSeqAAln();
1088                                 string temp = alignment->getSeqBAln();
1089                 
1090                                 int alnLength = oligo.length();
1091                                 
1092                                 for(int i=oligo.length()-1;i>=0;i--){
1093                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1094                                 }
1095                                 oligo = oligo.substr(0,alnLength);
1096                                 temp = temp.substr(0,alnLength);
1097                                 
1098                                 int newStart=0;
1099                                 int numDiff = countDiffs(oligo, temp);
1100                                 
1101 //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
1102                                 
1103                                 if(numDiff < minDiff){
1104                                         minDiff = numDiff;
1105                                         minCount = 1;
1106                                         minGroup = it->second;
1107                                         minPos = 0;
1108                                         for(int i=0;i<alnLength;i++){
1109                                                 if(temp[i] != '-'){
1110                                                         minPos++;
1111                                                 }
1112                                         }
1113                                 }
1114                                 else if(numDiff == minDiff){
1115                                         minCount++;
1116                                 }
1117
1118                         }
1119
1120                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1121                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1122                         else{                                                                                                   //use the best match
1123                                 group = minGroup;
1124                                 seq.setUnaligned(rawSequence.substr(minPos));
1125                                 
1126                                 if(qual.getName() != ""){
1127                                         qual.trimQScores(minPos, -1);
1128                                 }
1129                                 success = minDiff;
1130                         }
1131                         
1132                         if (alignment != NULL) {  delete alignment;  }
1133                         
1134                 }
1135 //              cout << success << endl;
1136                 
1137                 return success;
1138                 
1139         }
1140         catch(exception& e) {
1141                 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1142                 exit(1);
1143         }
1144
1145 }
1146
1147 //***************************************************************************************************************
1148
1149 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1150         try {
1151                 string rawSequence = seq.getUnaligned();
1152                 int success = pdiffs + 1;       //guilty until proven innocent
1153                 
1154                 //can you find the primer
1155                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1156                         string oligo = it->first;
1157                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1158                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1159                                 break;  
1160                         }
1161                         
1162                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1163                                 group = it->second;
1164                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1165                                 if(qual.getName() != ""){
1166                                         qual.trimQScores(oligo.length(), -1);
1167                                 }
1168                                 success = 0;
1169                                 break;
1170                         }
1171                 }
1172
1173                 //if you found the barcode or if you don't want to allow for diffs
1174 //              cout << success;
1175                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1176                 
1177                 else { //try aligning and see if you can find it
1178 //                      cout << endl;
1179
1180                         int maxLength = 0;
1181
1182                         Alignment* alignment;
1183                         if (primers.size() > 0) {
1184                                 map<string,int>::iterator it=primers.begin();
1185
1186                                 for(it;it!=primers.end();it++){
1187                                         if(it->first.length() > maxLength){
1188                                                 maxLength = it->first.length();
1189                                         }
1190                                 }
1191                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
1192
1193                         }else{ alignment = NULL; } 
1194                         
1195                         //can you find the barcode
1196                         int minDiff = 1e6;
1197                         int minCount = 1;
1198                         int minGroup = -1;
1199                         int minPos = 0;
1200                         
1201                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1202                                 string oligo = it->first;
1203 //                              int length = oligo.length();
1204                                 
1205                                 if(rawSequence.length() < maxLength){   
1206                                         success = pdiffs + 100;
1207                                         break;
1208                                 }
1209                                 
1210                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1211                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1212                                 oligo = alignment->getSeqAAln();
1213                                 string temp = alignment->getSeqBAln();
1214                 
1215                                 int alnLength = oligo.length();
1216                                 
1217                                 for(int i=oligo.length()-1;i>=0;i--){
1218                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1219                                 }
1220                                 oligo = oligo.substr(0,alnLength);
1221                                 temp = temp.substr(0,alnLength);
1222                                 
1223                                 int newStart=0;
1224                                 int numDiff = countDiffs(oligo, temp);
1225                                 
1226 //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
1227                                 
1228                                 if(numDiff < minDiff){
1229                                         minDiff = numDiff;
1230                                         minCount = 1;
1231                                         minGroup = it->second;
1232                                         minPos = 0;
1233                                         for(int i=0;i<alnLength;i++){
1234                                                 if(temp[i] != '-'){
1235                                                         minPos++;
1236                                                 }
1237                                         }
1238                                 }
1239                                 else if(numDiff == minDiff){
1240                                         minCount++;
1241                                 }
1242
1243                         }
1244
1245                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1246                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1247                         else{                                                                                                   //use the best match
1248                                 group = minGroup;
1249                                 seq.setUnaligned(rawSequence.substr(minPos));
1250                                 if(qual.getName() != ""){
1251                                         qual.trimQScores(minPos, -1);
1252                                 }
1253                                 success = minDiff;
1254                         }
1255                         
1256                         if (alignment != NULL) {  delete alignment;  }
1257                         
1258                 }
1259                 
1260                 return success;
1261
1262         }
1263         catch(exception& e) {
1264                 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1265                 exit(1);
1266         }
1267 }
1268
1269 //***************************************************************************************************************
1270
1271 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1272         try {
1273                 string rawSequence = seq.getUnaligned();
1274                 bool success = 0;       //guilty until proven innocent
1275                 
1276                 for(int i=0;i<numRPrimers;i++){
1277                         string oligo = revPrimer[i];
1278                         
1279                         if(rawSequence.length() < oligo.length()){
1280                                 success = 0;
1281                                 break;
1282                         }
1283                         
1284                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1285                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1286                                 if(qual.getName() != ""){
1287                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1288                                 }
1289                                 success = 1;
1290                                 break;
1291                         }
1292                 }       
1293                 return success;
1294                 
1295         }
1296         catch(exception& e) {
1297                 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1298                 exit(1);
1299         }
1300 }
1301
1302 //***************************************************************************************************************
1303
1304 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1305         try {
1306                 bool success = 1;
1307                 if(qscores.getName() != ""){
1308                         qscores.trimQScores(-1, keepFirst);
1309                 }
1310                 sequence.trim(keepFirst);
1311                 return success;
1312         }
1313         catch(exception& e) {
1314                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1315                 exit(1);
1316         }
1317         
1318 }       
1319
1320 //***************************************************************************************************************
1321
1322 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1323         try {
1324                 bool success = 0;
1325                 
1326                 int length = sequence.getNumBases() - removeLast;
1327                 
1328                 if(length > 0){
1329                         if(qscores.getName() != ""){
1330                                 qscores.trimQScores(-1, length);
1331                         }
1332                         sequence.trim(length);
1333                         success = 1;
1334                 }
1335                 else{
1336                         success = 0;
1337                 }
1338
1339                 return success;
1340         }
1341         catch(exception& e) {
1342                 m->errorOut(e, "removeLastTrim", "countDiffs");
1343                 exit(1);
1344         }
1345         
1346 }       
1347
1348 //***************************************************************************************************************
1349
1350 bool TrimSeqsCommand::cullLength(Sequence& seq){
1351         try {
1352         
1353                 int length = seq.getNumBases();
1354                 bool success = 0;       //guilty until proven innocent
1355                 
1356                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1357                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1358                 else                                                                                            {       success = 0;    }
1359                 
1360                 return success;
1361         
1362         }
1363         catch(exception& e) {
1364                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1365                 exit(1);
1366         }
1367         
1368 }
1369
1370 //***************************************************************************************************************
1371
1372 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1373         try {
1374                 int longHomoP = seq.getLongHomoPolymer();
1375                 bool success = 0;       //guilty until proven innocent
1376                 
1377                 if(longHomoP <= maxHomoP){      success = 1;    }
1378                 else                                    {       success = 0;    }
1379                 
1380                 return success;
1381         }
1382         catch(exception& e) {
1383                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1384                 exit(1);
1385         }
1386         
1387 }
1388
1389 //***************************************************************************************************************
1390
1391 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1392         try {
1393                 int numNs = seq.getAmbigBases();
1394                 bool success = 0;       //guilty until proven innocent
1395                 
1396                 if(numNs <= maxAmbig)   {       success = 1;    }
1397                 else                                    {       success = 0;    }
1398                 
1399                 return success;
1400         }
1401         catch(exception& e) {
1402                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1403                 exit(1);
1404         }
1405         
1406 }
1407
1408 //***************************************************************************************************************
1409
1410 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1411         try {
1412                 bool success = 1;
1413                 int length = oligo.length();
1414                 
1415                 for(int i=0;i<length;i++){
1416                         
1417                         if(oligo[i] != seq[i]){
1418                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1419                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1420                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1421                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1422                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1423                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1424                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1425                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1426                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1427                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1428                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1429                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1430                                 
1431                                 if(success == 0)        {       break;   }
1432                         }
1433                         else{
1434                                 success = 1;
1435                         }
1436                 }
1437                 
1438                 return success;
1439         }
1440         catch(exception& e) {
1441                 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1442                 exit(1);
1443         }
1444
1445 }
1446
1447 //***************************************************************************************************************
1448
1449 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1450         try {
1451
1452                 int length = oligo.length();
1453                 int countDiffs = 0;
1454                 
1455                 for(int i=0;i<length;i++){
1456                                                                 
1457                         if(oligo[i] != seq[i]){
1458                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1459                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1460                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1461                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1462                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1463                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1464                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1465                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1466                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1467                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1468                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1469                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1470                         }
1471                         
1472                 }
1473                 
1474                 return countDiffs;
1475         }
1476         catch(exception& e) {
1477                 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1478                 exit(1);
1479         }
1480
1481 }
1482
1483 //***************************************************************************************************************