]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
added set.current and get.current commands and modified existing commands to update...
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //**********************************************************************************************************************
14
15 vector<string> TrimSeqsCommand::getValidParameters(){   
16         try {
17                 string Array[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop","minlength", "maxlength", "qfile", 
18                                                                         "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
19                                                                         "keepfirst", "removelast",
20                                                                         "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
21                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
22                 return myArray;
23         }
24         catch(exception& e) {
25                 m->errorOut(e, "TrimSeqsCommand", "getValidParameters");
26                 exit(1);
27         }
28 }
29
30 //**********************************************************************************************************************
31
32 TrimSeqsCommand::TrimSeqsCommand(){     
33         try {
34                 abort = true; calledHelp = true; 
35                 vector<string> tempOutNames;
36                 outputTypes["fasta"] = tempOutNames;
37                 outputTypes["qfile"] = tempOutNames;
38                 outputTypes["group"] = tempOutNames;
39         }
40         catch(exception& e) {
41                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
42                 exit(1);
43         }
44 }
45
46 //**********************************************************************************************************************
47
48 vector<string> TrimSeqsCommand::getRequiredParameters(){        
49         try {
50                 string Array[] =  {"fasta"};
51                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
52                 return myArray;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "TrimSeqsCommand", "getRequiredParameters");
56                 exit(1);
57         }
58 }
59
60 //**********************************************************************************************************************
61
62 vector<string> TrimSeqsCommand::getRequiredFiles(){     
63         try {
64                 vector<string> myArray;
65                 return myArray;
66         }
67         catch(exception& e) {
68                 m->errorOut(e, "TrimSeqsCommand", "getRequiredFiles");
69                 exit(1);
70         }
71 }
72
73 //***************************************************************************************************************
74
75 TrimSeqsCommand::TrimSeqsCommand(string option)  {
76         try {
77                 
78                 abort = false; calledHelp = false;   
79                 comboStarts = 0;
80                 
81                 //allow user to run help
82                 if(option == "help") { help(); abort = true; calledHelp = true; }
83                 
84                 else {
85                         //valid paramters for this command
86                         string AlignArray[] =  {        "fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", 
87                                                                 "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
88                                                                 "keepfirst", "removelast",
89                                                                 "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
90                         
91                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
92                         
93                         OptionParser parser(option);
94                         map<string,string> parameters = parser.getParameters();
95                         
96                         ValidParameters validParameter;
97                         map<string,string>::iterator it;
98                         
99                         //check to make sure all parameters are valid for command
100                         for (it = parameters.begin(); it != parameters.end(); it++) { 
101                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
102                         }
103                         
104                         //initialize outputTypes
105                         vector<string> tempOutNames;
106                         outputTypes["fasta"] = tempOutNames;
107                         outputTypes["qfile"] = tempOutNames;
108                         outputTypes["group"] = tempOutNames;
109                         
110                         //if the user changes the input directory command factory will send this info to us in the output parameter 
111                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
112                         if (inputDir == "not found"){   inputDir = "";          }
113                         else {
114                                 string path;
115                                 it = parameters.find("fasta");
116                                 //user has given a template file
117                                 if(it != parameters.end()){ 
118                                         path = m->hasPath(it->second);
119                                         //if the user has not given a path then, add inputdir. else leave path alone.
120                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
121                                 }
122                                 
123                                 it = parameters.find("oligos");
124                                 //user has given a template file
125                                 if(it != parameters.end()){ 
126                                         path = m->hasPath(it->second);
127                                         //if the user has not given a path then, add inputdir. else leave path alone.
128                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
129                                 }
130                                 
131                                 it = parameters.find("qfile");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
137                                 }
138                                 
139                         }
140
141                         
142                         //check for required parameters
143                         fastaFile = validParameter.validFile(parameters, "fasta", true);
144                         if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the trim.seqs command."); m->mothurOutEndLine(); abort = true; }
145                         else if (fastaFile == "not open") { abort = true; }     
146                         
147                         //if the user changes the output directory command factory will send this info to us in the output parameter 
148                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
149                                 outputDir = ""; 
150                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
151                         }
152                 
153                         
154                         //check for optional parameter and set defaults
155                         // ...at some point should added some additional type checking...
156                         string temp;
157                         temp = validParameter.validFile(parameters, "flip", false);
158                         if (temp == "not found"){       flip = 0;       }
159                         else if(m->isTrue(temp))        {       flip = 1;       }
160                 
161                         temp = validParameter.validFile(parameters, "oligos", true);
162                         if (temp == "not found"){       oligoFile = "";         }
163                         else if(temp == "not open"){    abort = true;   } 
164                         else                                    {       oligoFile = temp;               }
165                         
166                         
167                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
168                         convert(temp, maxAmbig);  
169
170                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
171                         convert(temp, maxHomoP);  
172
173                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
174                         convert(temp, minLength); 
175                         
176                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
177                         convert(temp, maxLength);
178                         
179                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
180                         convert(temp, bdiffs);
181                         
182                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
183                         convert(temp, pdiffs);
184                         
185                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
186                         convert(temp, tdiffs);
187                         
188                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
189                         
190                         temp = validParameter.validFile(parameters, "qfile", true);     
191                         if (temp == "not found")        {       qFileName = "";         }
192                         else if(temp == "not open")     {       abort = true;           }
193                         else                                            {       qFileName = temp;       }
194                         
195                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
196                         convert(temp, qThreshold);
197                         
198                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
199                         qtrim = m->isTrue(temp);
200
201                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
202                         convert(temp, qRollAverage);
203
204                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
205                         convert(temp, qWindowAverage);
206
207                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
208                         convert(temp, qWindowSize);
209
210                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
211                         convert(temp, qWindowStep);
212
213                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
214                         convert(temp, qAverage);
215
216                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
217                         convert(temp, keepFirst);
218
219                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
220                         convert(temp, removeLast);
221                         
222                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
223                         allFiles = m->isTrue(temp);
224                         
225                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found") { temp = "1"; }
226                         convert(temp, processors); 
227                         
228                         
229                         if(allFiles && (oligoFile == "")){
230                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
231                         }
232                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
233                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
234                                 qAverage=0;
235                                 qThreshold=0;
236                         }
237                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
238                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
239                                 abort = true;
240                         }
241                 }
242
243         }
244         catch(exception& e) {
245                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
246                 exit(1);
247         }
248 }
249
250 //**********************************************************************************************************************
251
252 void TrimSeqsCommand::help(){
253         try {
254                 m->mothurOut("The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n");
255                 m->mothurOut("The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n");
256                 m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n");
257                 m->mothurOut("The fasta parameter is required.\n");
258                 m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n");
259                 m->mothurOut("The oligos parameter allows you to provide an oligos file.\n");
260                 m->mothurOut("The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n");
261                 m->mothurOut("The maxhomop parameter allows you to set a maximum homopolymer length. \n");
262                 m->mothurOut("The minlength parameter allows you to set and minimum sequence length. \n");
263                 m->mothurOut("The maxlength parameter allows you to set and maximum sequence length. \n");
264                 m->mothurOut("The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n");
265                 m->mothurOut("The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n");
266                 m->mothurOut("The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n");
267                 m->mothurOut("The qfile parameter allows you to provide a quality file.\n");
268                 m->mothurOut("The qthreshold parameter allows you to set a minimum quality score allowed. \n");
269                 m->mothurOut("The qaverage parameter allows you to set a minimum average quality score allowed. \n");
270                 m->mothurOut("The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n");
271                 m->mothurOut("The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n");
272                 m->mothurOut("The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n");
273                 m->mothurOut("The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n");
274                 m->mothurOut("The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n");
275                 m->mothurOut("The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n");
276                 m->mothurOut("The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n");
277                 m->mothurOut("The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n");
278                 m->mothurOut("The trim.seqs command should be in the following format: \n");
279                 m->mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n");
280                 m->mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n");    
281                 m->mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
282                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
283                 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
284
285         }
286         catch(exception& e) {
287                 m->errorOut(e, "TrimSeqsCommand", "help");
288                 exit(1);
289         }
290 }
291
292
293 //***************************************************************************************************************
294
295 TrimSeqsCommand::~TrimSeqsCommand(){    /*      do nothing      */      }
296
297 //***************************************************************************************************************
298
299 int TrimSeqsCommand::execute(){
300         try{
301         
302                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
303                 
304                 numFPrimers = 0;  //this needs to be initialized
305                 numRPrimers = 0;
306                 vector<vector<string> > fastaFileNames;
307                 vector<vector<string> > qualFileNames;
308                 
309                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
310                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
311                 
312                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
313                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
314                 
315                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
316                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
317                 if (qFileName != "") {
318                         outputNames.push_back(trimQualFile);
319                         outputNames.push_back(scrapQualFile);
320                         outputTypes["qfile"].push_back(trimQualFile);
321                         outputTypes["qfile"].push_back(scrapQualFile); 
322                 }
323                 
324                 string outputGroupFileName;
325                 if(oligoFile != ""){
326                         outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
327                         outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
328                         getOligos(fastaFileNames, qualFileNames);
329                 }
330
331                 vector<unsigned long int> fastaFilePos;
332                 vector<unsigned long int> qFilePos;
333                 
334                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
335                 
336                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
337                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
338                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
339                 }       
340                 if(qFileName == "")     {       qLines = lines; } //files with duds
341                 
342                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
343                                 if(processors == 1){
344                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]);
345                                 }else{
346                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames); 
347                                 }       
348                 #else
349                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]);
350                 #endif
351                 
352                 if (m->control_pressed) {  return 0; }                  
353                         
354                 if(allFiles){
355                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
356                         map<string, string>::iterator it;
357                         set<string> namesToRemove;
358                         for(int i=0;i<fastaFileNames.size();i++){
359                                 for(int j=0;j<fastaFileNames[0].size();j++){
360                                         if (fastaFileNames[i][j] != "") {
361                                                 if(m->isBlank(fastaFileNames[i][j])){
362                                                         remove(fastaFileNames[i][j].c_str());
363                                                         namesToRemove.insert(fastaFileNames[i][j]);
364                                                         
365                                                         if(qFileName != ""){
366                                                                 remove(qualFileNames[i][j].c_str());
367                                                                 namesToRemove.insert(qualFileNames[i][j]);
368                                                         }
369                                                 }else{  
370                                                         it = uniqueFastaNames.find(fastaFileNames[i][j]);
371                                                         if (it == uniqueFastaNames.end()) {     
372                                                                 uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
373                                                         }       
374                                                 }
375                                         }
376                                 }
377                         }
378                         
379                         //remove names for outputFileNames, just cleans up the output
380                         vector<string> outputNames2;
381                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
382                         outputNames = outputNames2;
383                         
384                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
385                                 ifstream in;
386                                 m->openInputFile(it->first, in);
387                                 
388                                 ofstream out;
389                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
390                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
391                                 m->openOutputFile(thisGroupName, out);
392                                 
393                                 while (!in.eof()){
394                                         if (m->control_pressed) { break; }
395                                         
396                                         Sequence currSeq(in); m->gobble(in);
397                                         out << currSeq.getName() << '\t' << it->second << endl;
398                                 }
399                                 in.close();
400                                 out.close();
401                         }
402                 }
403                 
404                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0;     }
405
406                 //output group counts
407                 m->mothurOutEndLine();
408                 //int total = 0;
409 //              for (int i = 0; i < barcodeNameVector.size(); i++) {
410 //                      if ((barcodeNameVector[i] != "") && (groupCounts[i] != 0)) { total += groupCounts[i]; m->mothurOut("Group " + barcodeNameVector[i] + " contains " + toString(groupCounts[i]) + " sequences."); m->mothurOutEndLine(); }
411 //              }
412 //              if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
413                 
414                         if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0;     }
415
416                 //set fasta file as new current fastafile
417                 string current = "";
418                 itTypes = outputTypes.find("fasta");
419                 if (itTypes != outputTypes.end()) {
420                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
421                 }
422                 
423                 itTypes = outputTypes.find("qfile");
424                 if (itTypes != outputTypes.end()) {
425                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
426                 }
427                 
428                 itTypes = outputTypes.find("group");
429                 if (itTypes != outputTypes.end()) {
430                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
431                 }
432
433                 m->mothurOutEndLine();
434                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
435                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
436                 m->mothurOutEndLine();
437                 
438                 return 0;       
439                         
440         }
441         catch(exception& e) {
442                 m->errorOut(e, "TrimSeqsCommand", "execute");
443                 exit(1);
444         }
445 }
446                 
447 /**************************************************************************************/
448
449 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, linePair* line, linePair* qline) {        
450                 
451         try {
452                 
453                 ofstream trimFASTAFile;
454                 m->openOutputFile(trimFileName, trimFASTAFile);
455                 
456                 ofstream scrapFASTAFile;
457                 m->openOutputFile(scrapFileName, scrapFASTAFile);
458                 
459                 ofstream trimQualFile;
460                 ofstream scrapQualFile;
461                 if(qFileName != ""){
462                         m->openOutputFile(trimQFileName, trimQualFile);
463                         m->openOutputFile(scrapQFileName, scrapQualFile);
464                 }
465                 
466                 ofstream outGroupsFile;
467                 if (oligoFile != ""){   m->openOutputFile(groupFileName, outGroupsFile);   }
468                 if(allFiles){
469                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
470                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
471                                         if (fastaFileNames[i][j] != "") {
472                                                 ofstream temp;
473                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
474                                                 if(qFileName != ""){
475                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
476                                                 }
477                                         }
478                                 }
479                         }
480                 }
481                 
482                 ifstream inFASTA;
483                 m->openInputFile(filename, inFASTA);
484                 inFASTA.seekg(line->start);
485                 
486                 ifstream qFile;
487                 if(qFileName != "")     {
488                         m->openInputFile(qFileName, qFile);
489                         qFile.seekg(qline->start);  
490                 }
491                 
492                 int count = 0;
493                 bool moreSeqs = 1;
494         
495                 while (moreSeqs) {
496                                 
497                         if (m->control_pressed) { 
498                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
499                                 if (oligoFile != "") {   outGroupsFile.close();   }
500
501                                 if(qFileName != ""){
502                                         qFile.close();
503                                 }
504                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
505
506                                 return 0;
507                         }
508                         
509                         int success = 1;
510                         string trashCode = "";
511                         int currentSeqsDiffs = 0;
512
513                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
514
515                         QualityScores currQual;
516                         if(qFileName != ""){
517                                 currQual = QualityScores(qFile);  m->gobble(qFile);
518                         }
519
520                         string origSeq = currSeq.getUnaligned();
521                         if (origSeq != "") {
522                                 
523                                 int barcodeIndex = 0;
524                                 int primerIndex = 0;
525                                 
526                                 if(barcodes.size() != 0){
527                                         success = stripBarcode(currSeq, currQual, barcodeIndex);
528                                         if(success > bdiffs)            {       trashCode += 'b';       }
529                                         else{ currentSeqsDiffs += success;  }
530                                 }
531                                 
532                                 if(numFPrimers != 0){
533                                         success = stripForward(currSeq, currQual, primerIndex);
534                                         if(success > pdiffs)            {       trashCode += 'f';       }
535                                         else{ currentSeqsDiffs += success;  }
536                                 }
537                                 
538                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
539                                 
540                                 if(numRPrimers != 0){
541                                         success = stripReverse(currSeq, currQual);
542                                         if(!success)                            {       trashCode += 'r';       }
543                                 }
544
545                                 if(keepFirst != 0){
546                                         success = keepFirstTrim(currSeq, currQual);
547                                 }
548                                 
549                                 if(removeLast != 0){
550                                         success = removeLastTrim(currSeq, currQual);
551                                         if(!success)                            {       trashCode += 'l';       }
552                                 }
553
554                                 
555                                 if(qFileName != ""){
556                                         int origLength = currSeq.getNumBases();
557                                         
558                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
559                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
560                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
561                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
562                                         else                                            {       success = 1;                            }
563                                         
564                                         //you don't want to trim, if it fails above then scrap it
565                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
566                                         
567                                         if(!success)                            {       trashCode += 'q';       }
568                                 }                               
569                 
570                                 if(minLength > 0 || maxLength > 0){
571                                         success = cullLength(currSeq);
572                                         if(!success)                            {       trashCode += 'l';       }
573                                 }
574                                 if(maxHomoP > 0){
575                                         success = cullHomoP(currSeq);
576                                         if(!success)                            {       trashCode += 'h';       }
577                                 }
578                                 if(maxAmbig != -1){
579                                         success = cullAmbigs(currSeq);
580                                         if(!success)                            {       trashCode += 'n';       }
581                                 }
582                                 
583                                 if(flip){               // should go last                       
584                                         currSeq.reverseComplement();
585                                         if(qFileName != ""){
586                                                 currQual.flipQScores(); 
587                                         }
588                                 }
589                                 
590                                 if(trashCode.length() == 0){
591                                         currSeq.setAligned(currSeq.getUnaligned());
592                                         currSeq.printSequence(trimFASTAFile);
593                                         
594                                         if(qFileName != ""){
595                                                 currQual.printQScores(trimQualFile);
596                                         }
597                                         
598                                         if(barcodes.size() != 0){
599                                                 outGroupsFile << currSeq.getName() << '\t' << barcodeNameVector[barcodeIndex] << endl;
600                                                 groupCounts[barcodeIndex]++;
601                                         }
602                                         
603                                         
604                                         if(allFiles){
605                                                 ofstream output;
606                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
607                                                 currSeq.printSequence(output);
608                                                 output.close();
609                                                 
610                                                 if(qFileName != ""){
611                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
612                                                         currQual.printQScores(output);
613                                                         output.close();                                                 
614                                                 }
615                                         }
616                                 }
617                                 else{
618                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
619                                         currSeq.setUnaligned(origSeq);
620                                         currSeq.setAligned(origSeq);
621                                         currSeq.printSequence(scrapFASTAFile);
622                                         if(qFileName != ""){
623                                                 currQual.printQScores(scrapQualFile);
624                                         }
625                                 }
626                                 count++;
627                         }
628                         
629                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
630                                 unsigned long int pos = inFASTA.tellg();
631                                 if ((pos == -1) || (pos >= line->end)) { break; }
632                         #else
633                                 if (inFASTA.eof()) { break; }
634                         #endif
635                                 
636                         //report progress
637                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
638                         
639                 }
640                 //report progress
641                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
642
643                 
644                 inFASTA.close();
645                 trimFASTAFile.close();
646                 scrapFASTAFile.close();
647                 if (oligoFile != "") {   outGroupsFile.close();   }
648                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
649                 
650                 return count;
651         }
652         catch(exception& e) {
653                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
654                 exit(1);
655         }
656 }
657
658 /**************************************************************************************************/
659
660 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames) {
661         try {
662 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
663                 int process = 1;
664                 int exitCommand = 1;
665                 processIDS.clear();
666                 
667                 //loop through and create all the processes you want
668                 while (process != processors) {
669                         int pid = fork();
670                         
671                         if (pid > 0) {
672                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
673                                 process++;
674                         }else if (pid == 0){
675                                 
676                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
677                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
678
679                                 if(allFiles){
680                                         ofstream temp;
681
682                                         for(int i=0;i<tempFASTAFileNames.size();i++){
683                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
684                                                         if (tempFASTAFileNames[i][j] != "") {
685                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
686                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
687
688                                                                 if(qFileName != ""){
689                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
690                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
691                                                                 }
692                                                         }
693                                                 }
694                                         }
695                                 }
696                                                         
697                                 driverCreateTrim(filename,
698                                                                  qFileName,
699                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
700                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
701                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
702                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
703                                                                  (groupFile + toString(getpid()) + ".temp"),
704                                                                  tempFASTAFileNames,
705                                                                  tempPrimerQualFileNames,
706                                                                  lines[process],
707                                                                  qLines[process]);
708                                 
709                                 //pass groupCounts to parent
710                                 ofstream out;
711                                 string tempFile = filename + toString(getpid()) + ".num.temp";
712                                 m->openOutputFile(tempFile, out);
713                                 for(int i = 0; i < groupCounts.size(); i++) {
714                                         out << groupCounts[i] << endl;
715                                 }
716                                 out.close();
717                                 
718                                 exit(0);
719                         }else { 
720                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
721                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
722                                 exit(0);
723                         }
724                 }
725                 
726                 //parent do my part
727                 ofstream temp;
728                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
729                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
730                 m->openOutputFile(trimQualFileName, temp);              temp.close();
731                 m->openOutputFile(scrapQualFileName, temp);             temp.close();
732
733                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
734                 
735                 //force parent to wait until all the processes are done
736                 for (int i=0;i<processIDS.size();i++) { 
737                         int temp = processIDS[i];
738                         wait(&temp);
739                 }
740                 
741                 //append files
742                 for(int i=0;i<processIDS.size();i++){
743                         
744                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
745                         
746                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
747                         remove((trimFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
748                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
749                         remove((scrapFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
750                         
751                         if(qFileName != ""){
752                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
753                                 remove((trimQualFileName + toString(processIDS[i]) + ".temp").c_str());
754                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
755                                 remove((scrapQualFileName + toString(processIDS[i]) + ".temp").c_str());
756                         }
757                         
758                         m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
759                         remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
760                         
761                         
762                         if(allFiles){
763                                 for(int j=0;j<fastaFileNames.size();j++){
764                                         for(int k=0;k<fastaFileNames[j].size();k++){
765                                                 if (fastaFileNames[j][k] != "") {
766                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
767                                                         remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
768                                                         
769                                                         if(qFileName != ""){
770                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
771                                                                 remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
772                                                         }
773                                                 }
774                                         }
775                                 }
776                         }
777                         
778                         ifstream in;
779                         string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
780                         m->openInputFile(tempFile, in);
781                         int count = 0; 
782                         int tempNum;
783                         while (!in.eof()) { 
784                                 in >> tempNum; m->gobble(in);
785                                 groupCounts[count] += tempNum; 
786                                 count++;
787                         }
788                         in.close(); remove(tempFile.c_str());
789                         
790                 }
791         
792                 return exitCommand;
793 #endif          
794         }
795         catch(exception& e) {
796                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
797                 exit(1);
798         }
799 }
800
801 /**************************************************************************************************/
802
803 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
804         try {
805                 
806                 //set file positions for fasta file
807                 fastaFilePos = m->divideFile(filename, processors);
808                 
809                 if (qfilename == "") { return processors; }
810                 
811                 //get name of first sequence in each chunk
812                 map<string, int> firstSeqNames;
813                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
814                         ifstream in;
815                         m->openInputFile(filename, in);
816                         in.seekg(fastaFilePos[i]);
817                 
818                         Sequence temp(in); 
819                         firstSeqNames[temp.getName()] = i;
820                 
821                         in.close();
822                 }
823                                 
824                 //seach for filePos of each first name in the qfile and save in qfileFilePos
825                 ifstream inQual;
826                 m->openInputFile(qfilename, inQual);
827                 
828                 string input;
829                 while(!inQual.eof()){   
830                         input = m->getline(inQual);
831
832                         if (input.length() != 0) {
833                                 if(input[0] == '>'){ //this is a sequence name line
834                                         istringstream nameStream(input);
835                                         
836                                         string sname = "";  nameStream >> sname;
837                                         sname = sname.substr(1);
838                                         
839                                         map<string, int>::iterator it = firstSeqNames.find(sname);
840                                         
841                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
842                                                 unsigned long int pos = inQual.tellg(); 
843                                                 qfileFilePos.push_back(pos - input.length() - 1);       
844                                                 firstSeqNames.erase(it);
845                                         }
846                                 }
847                         }
848                         
849                         if (firstSeqNames.size() == 0) { break; }
850                 }
851                 inQual.close();
852                 
853                 
854                 if (firstSeqNames.size() != 0) { 
855                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
856                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
857                         }
858                         qFileName = "";
859                         return processors;
860                 }
861
862                 //get last file position of qfile
863                 FILE * pFile;
864                 unsigned long int size;
865                 
866                 //get num bytes in file
867                 pFile = fopen (qfilename.c_str(),"rb");
868                 if (pFile==NULL) perror ("Error opening file");
869                 else{
870                         fseek (pFile, 0, SEEK_END);
871                         size=ftell (pFile);
872                         fclose (pFile);
873                 }
874                 
875                 qfileFilePos.push_back(size);
876                 
877                 return processors;
878         }
879         catch(exception& e) {
880                 m->errorOut(e, "TrimSeqsCommand", "setLines");
881                 exit(1);
882         }
883 }
884
885 //***************************************************************************************************************
886
887 void TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames){
888         try {
889                 ifstream inOligos;
890                 m->openInputFile(oligoFile, inOligos);
891                 
892                 ofstream test;
893                 
894                 string type, oligo, group;
895
896                 int indexPrimer = 0;
897                 int indexBarcode = 0;
898                 
899                 while(!inOligos.eof()){
900
901                         inOligos >> type; m->gobble(inOligos);
902                                         
903                         if(type[0] == '#'){
904                                 while (!inOligos.eof()) {       char c = inOligos.get(); if (c == 10 || c == 13){       break;  }       } // get rest of line if there's any crap there
905                         }
906                         else{
907                                 //make type case insensitive
908                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
909                                 
910                                 inOligos >> oligo;
911                                 
912                                 for(int i=0;i<oligo.length();i++){
913                                         oligo[i] = toupper(oligo[i]);
914                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
915                                 }
916                                 
917                                 if(type == "FORWARD"){
918                                         group = "";
919                                         
920                                         // get rest of line in case there is a primer name
921                                         while (!inOligos.eof()) {       
922                                                 char c = inOligos.get(); 
923                                                 if (c == 10 || c == 13){        break;  }
924                                                 else if (c == 32 || c == 9){;} //space or tab
925                                                 else {  group += c;  }
926                                         } 
927                                         
928                                         //check for repeat barcodes
929                                         map<string, int>::iterator itPrime = primers.find(oligo);
930                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
931                                         
932                                         primers[oligo]=indexPrimer; indexPrimer++;              
933                                         primerNameVector.push_back(group);
934                                 }
935                                 else if(type == "REVERSE"){
936                                         Sequence oligoRC("reverse", oligo);
937                                         oligoRC.reverseComplement();
938                                         revPrimer.push_back(oligoRC.getUnaligned());
939                                 }
940                                 else if(type == "BARCODE"){
941                                         inOligos >> group;
942                                         
943                                         //check for repeat barcodes
944                                         map<string, int>::iterator itBar = barcodes.find(oligo);
945                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
946                                                 
947                                         barcodes[oligo]=indexBarcode; indexBarcode++;
948                                         barcodeNameVector.push_back(group);
949                                 }
950                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
951                         }
952                         m->gobble(inOligos);
953                 }       
954                 inOligos.close();
955                 
956                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
957                 
958                 //add in potential combos
959                 if(barcodeNameVector.size() == 0){
960                         barcodes[""] = 0;
961                         barcodeNameVector.push_back("");                        
962                 }
963                 
964                 if(primerNameVector.size() == 0){
965                         primers[""] = 0;
966                         primerNameVector.push_back("");                 
967                 }
968                 
969                 fastaFileNames.resize(barcodeNameVector.size());
970                 for(int i=0;i<fastaFileNames.size();i++){
971                         fastaFileNames[i].assign(primerNameVector.size(), "");
972                 }
973                 if(qFileName != ""){    qualFileNames = fastaFileNames; }
974                 
975                 if(allFiles){
976                         set<string> uniqueNames; //used to cleanup outputFileNames
977                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
978                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
979                                         
980                                         string primerName = primerNameVector[itPrimer->second];
981                                         string barcodeName = barcodeNameVector[itBar->second];
982                                         
983                                         string comboGroupName = "";
984                                         string fastaFileName = "";
985                                         string qualFileName = "";
986                                         
987                                         if(primerName == ""){
988                                                 comboGroupName = barcodeNameVector[itBar->second];
989                                         }
990                                         else{
991                                                 if(barcodeName == ""){
992                                                         comboGroupName = primerNameVector[itPrimer->second];
993                                                 }
994                                                 else{
995                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
996                                                 }
997                                         }
998
999                                         ofstream temp;
1000                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1001                                         if (uniqueNames.count(fastaFileName) == 0) {
1002                                                 outputNames.push_back(fastaFileName);
1003                                                 outputTypes["fasta"].push_back(fastaFileName);
1004                                                 uniqueNames.insert(fastaFileName);
1005                                         }
1006                                         
1007                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1008                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1009
1010                                         if(qFileName != ""){
1011                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1012                                                 if (uniqueNames.count(fastaFileName) == 0) {
1013                                                         outputNames.push_back(qualFileName);
1014                                                         outputTypes["qfile"].push_back(qualFileName);
1015                                                 }
1016                                                 
1017                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1018                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1019                                         }
1020                                 }
1021                         }
1022                 }
1023                 numFPrimers = primers.size();
1024                 numRPrimers = revPrimer.size();
1025                 groupCounts.resize(barcodeNameVector.size(), 0);
1026
1027         }
1028         catch(exception& e) {
1029                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1030                 exit(1);
1031         }
1032 }
1033
1034 //***************************************************************************************************************
1035
1036 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
1037         try {
1038                 
1039                 string rawSequence = seq.getUnaligned();
1040                 int success = bdiffs + 1;       //guilty until proven innocent
1041                 
1042                 //can you find the barcode
1043                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1044                         string oligo = it->first;
1045                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
1046                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1047                                 break;  
1048                         }
1049                         
1050                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1051                                 group = it->second;
1052                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1053                                 
1054                                 if(qual.getName() != ""){
1055                                         qual.trimQScores(oligo.length(), -1);
1056                                 }
1057                                 
1058                                 success = 0;
1059                                 break;
1060                         }
1061                 }
1062                 
1063                 //if you found the barcode or if you don't want to allow for diffs
1064                 if ((bdiffs == 0) || (success == 0)) { return success;  }
1065                 
1066                 else { //try aligning and see if you can find it
1067
1068                         int maxLength = 0;
1069
1070                         Alignment* alignment;
1071                         if (barcodes.size() > 0) {
1072                                 map<string,int>::iterator it=barcodes.begin();
1073
1074                                 for(it;it!=barcodes.end();it++){
1075                                         if(it->first.length() > maxLength){
1076                                                 maxLength = it->first.length();
1077                                         }
1078                                 }
1079                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
1080
1081                         }else{ alignment = NULL; } 
1082                         
1083                         //can you find the barcode
1084                         int minDiff = 1e6;
1085                         int minCount = 1;
1086                         int minGroup = -1;
1087                         int minPos = 0;
1088                         
1089                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1090                                 string oligo = it->first;
1091 //                              int length = oligo.length();
1092                                 
1093                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
1094                                         success = bdiffs + 10;
1095                                         break;
1096                                 }
1097                                 
1098                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1099                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1100                                 oligo = alignment->getSeqAAln();
1101                                 string temp = alignment->getSeqBAln();
1102                 
1103                                 int alnLength = oligo.length();
1104                                 
1105                                 for(int i=oligo.length()-1;i>=0;i--){
1106                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1107                                 }
1108                                 oligo = oligo.substr(0,alnLength);
1109                                 temp = temp.substr(0,alnLength);
1110                                 
1111                                 int numDiff = countDiffs(oligo, temp);
1112                                 
1113                                 if(numDiff < minDiff){
1114                                         minDiff = numDiff;
1115                                         minCount = 1;
1116                                         minGroup = it->second;
1117                                         minPos = 0;
1118                                         for(int i=0;i<alnLength;i++){
1119                                                 if(temp[i] != '-'){
1120                                                         minPos++;
1121                                                 }
1122                                         }
1123                                 }
1124                                 else if(numDiff == minDiff){
1125                                         minCount++;
1126                                 }
1127
1128                         }
1129
1130                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1131                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1132                         else{                                                                                                   //use the best match
1133                                 group = minGroup;
1134                                 seq.setUnaligned(rawSequence.substr(minPos));
1135                                 
1136                                 if(qual.getName() != ""){
1137                                         qual.trimQScores(minPos, -1);
1138                                 }
1139                                 success = minDiff;
1140                         }
1141                         
1142                         if (alignment != NULL) {  delete alignment;  }
1143                         
1144                 }
1145                 
1146                 return success;
1147                 
1148         }
1149         catch(exception& e) {
1150                 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1151                 exit(1);
1152         }
1153
1154 }
1155
1156 //***************************************************************************************************************
1157
1158 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1159         try {
1160                 string rawSequence = seq.getUnaligned();
1161                 int success = pdiffs + 1;       //guilty until proven innocent
1162                 
1163                 //can you find the primer
1164                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1165                         string oligo = it->first;
1166                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1167                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1168                                 break;  
1169                         }
1170                         
1171                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1172                                 group = it->second;
1173                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1174                                 if(qual.getName() != ""){
1175                                         qual.trimQScores(oligo.length(), -1);
1176                                 }
1177                                 success = 0;
1178                                 break;
1179                         }
1180                 }
1181
1182                 //if you found the barcode or if you don't want to allow for diffs
1183                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1184                 
1185                 else { //try aligning and see if you can find it
1186
1187                         int maxLength = 0;
1188
1189                         Alignment* alignment;
1190                         if (primers.size() > 0) {
1191                                 map<string,int>::iterator it=primers.begin();
1192
1193                                 for(it;it!=primers.end();it++){
1194                                         if(it->first.length() > maxLength){
1195                                                 maxLength = it->first.length();
1196                                         }
1197                                 }
1198                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
1199
1200                         }else{ alignment = NULL; } 
1201                         
1202                         //can you find the barcode
1203                         int minDiff = 1e6;
1204                         int minCount = 1;
1205                         int minGroup = -1;
1206                         int minPos = 0;
1207                         
1208                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1209                                 string oligo = it->first;
1210 //                              int length = oligo.length();
1211                                 
1212                                 if(rawSequence.length() < maxLength){   
1213                                         success = pdiffs + 100;
1214                                         break;
1215                                 }
1216                                 
1217                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1218                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1219                                 oligo = alignment->getSeqAAln();
1220                                 string temp = alignment->getSeqBAln();
1221                 
1222                                 int alnLength = oligo.length();
1223                                 
1224                                 for(int i=oligo.length()-1;i>=0;i--){
1225                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1226                                 }
1227                                 oligo = oligo.substr(0,alnLength);
1228                                 temp = temp.substr(0,alnLength);
1229                                 
1230                                 int numDiff = countDiffs(oligo, temp);
1231                                 
1232                                 if(numDiff < minDiff){
1233                                         minDiff = numDiff;
1234                                         minCount = 1;
1235                                         minGroup = it->second;
1236                                         minPos = 0;
1237                                         for(int i=0;i<alnLength;i++){
1238                                                 if(temp[i] != '-'){
1239                                                         minPos++;
1240                                                 }
1241                                         }
1242                                 }
1243                                 else if(numDiff == minDiff){
1244                                         minCount++;
1245                                 }
1246
1247                         }
1248
1249                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1250                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1251                         else{                                                                                                   //use the best match
1252                                 group = minGroup;
1253                                 seq.setUnaligned(rawSequence.substr(minPos));
1254                                 if(qual.getName() != ""){
1255                                         qual.trimQScores(minPos, -1);
1256                                 }
1257                                 success = minDiff;
1258                         }
1259                         
1260                         if (alignment != NULL) {  delete alignment;  }
1261                         
1262                 }
1263                 
1264                 return success;
1265
1266         }
1267         catch(exception& e) {
1268                 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1269                 exit(1);
1270         }
1271 }
1272
1273 //***************************************************************************************************************
1274
1275 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1276         try {
1277                 string rawSequence = seq.getUnaligned();
1278                 bool success = 0;       //guilty until proven innocent
1279                 
1280                 for(int i=0;i<numRPrimers;i++){
1281                         string oligo = revPrimer[i];
1282                         
1283                         if(rawSequence.length() < oligo.length()){
1284                                 success = 0;
1285                                 break;
1286                         }
1287                         
1288                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1289                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1290                                 if(qual.getName() != ""){
1291                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1292                                 }
1293                                 success = 1;
1294                                 break;
1295                         }
1296                 }       
1297                 return success;
1298                 
1299         }
1300         catch(exception& e) {
1301                 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1302                 exit(1);
1303         }
1304 }
1305
1306 //***************************************************************************************************************
1307
1308 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1309         try {
1310                 bool success = 1;
1311                 if(qscores.getName() != ""){
1312                         qscores.trimQScores(-1, keepFirst);
1313                 }
1314                 sequence.trim(keepFirst);
1315                 return success;
1316         }
1317         catch(exception& e) {
1318                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1319                 exit(1);
1320         }
1321         
1322 }       
1323
1324 //***************************************************************************************************************
1325
1326 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1327         try {
1328                 bool success = 0;
1329                 
1330                 int length = sequence.getNumBases() - removeLast;
1331                 
1332                 if(length > 0){
1333                         if(qscores.getName() != ""){
1334                                 qscores.trimQScores(-1, length);
1335                         }
1336                         sequence.trim(length);
1337                         success = 1;
1338                 }
1339                 else{
1340                         success = 0;
1341                 }
1342
1343                 return success;
1344         }
1345         catch(exception& e) {
1346                 m->errorOut(e, "removeLastTrim", "countDiffs");
1347                 exit(1);
1348         }
1349         
1350 }       
1351
1352 //***************************************************************************************************************
1353
1354 bool TrimSeqsCommand::cullLength(Sequence& seq){
1355         try {
1356         
1357                 int length = seq.getNumBases();
1358                 bool success = 0;       //guilty until proven innocent
1359                 
1360                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1361                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1362                 else                                                                                            {       success = 0;    }
1363                 
1364                 return success;
1365         
1366         }
1367         catch(exception& e) {
1368                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1369                 exit(1);
1370         }
1371         
1372 }
1373
1374 //***************************************************************************************************************
1375
1376 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1377         try {
1378                 int longHomoP = seq.getLongHomoPolymer();
1379                 bool success = 0;       //guilty until proven innocent
1380                 
1381                 if(longHomoP <= maxHomoP){      success = 1;    }
1382                 else                                    {       success = 0;    }
1383                 
1384                 return success;
1385         }
1386         catch(exception& e) {
1387                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1388                 exit(1);
1389         }
1390         
1391 }
1392
1393 //***************************************************************************************************************
1394
1395 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1396         try {
1397                 int numNs = seq.getAmbigBases();
1398                 bool success = 0;       //guilty until proven innocent
1399                 
1400                 if(numNs <= maxAmbig)   {       success = 1;    }
1401                 else                                    {       success = 0;    }
1402                 
1403                 return success;
1404         }
1405         catch(exception& e) {
1406                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1407                 exit(1);
1408         }
1409         
1410 }
1411
1412 //***************************************************************************************************************
1413
1414 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1415         try {
1416                 bool success = 1;
1417                 int length = oligo.length();
1418                 
1419                 for(int i=0;i<length;i++){
1420                         
1421                         if(oligo[i] != seq[i]){
1422                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1423                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1424                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1425                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1426                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1427                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1428                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1429                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1430                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1431                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1432                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1433                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1434                                 
1435                                 if(success == 0)        {       break;   }
1436                         }
1437                         else{
1438                                 success = 1;
1439                         }
1440                 }
1441                 
1442                 return success;
1443         }
1444         catch(exception& e) {
1445                 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1446                 exit(1);
1447         }
1448
1449 }
1450
1451 //***************************************************************************************************************
1452
1453 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1454         try {
1455
1456                 int length = oligo.length();
1457                 int countDiffs = 0;
1458                 
1459                 for(int i=0;i<length;i++){
1460                                                                 
1461                         if(oligo[i] != seq[i]){
1462                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1463                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1464                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1465                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1466                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1467                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1468                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1469                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1470                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1471                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1472                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1473                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1474                         }
1475                         
1476                 }
1477                 
1478                 return countDiffs;
1479         }
1480         catch(exception& e) {
1481                 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1482                 exit(1);
1483         }
1484
1485 }
1486
1487 //***************************************************************************************************************