]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
56fd8c54b53aaff9f4e43e8e694fc71f6987fd7c
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //**********************************************************************************************************************
14 vector<string> TrimSeqsCommand::getValidParameters(){   
15         try {
16                 string Array[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", 
17                                                                         "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
18                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
19                 return myArray;
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "TrimSeqsCommand", "getValidParameters");
23                 exit(1);
24         }
25 }
26 //**********************************************************************************************************************
27 TrimSeqsCommand::TrimSeqsCommand(){     
28         try {
29                 //initialize outputTypes
30                 vector<string> tempOutNames;
31                 outputTypes["fasta"] = tempOutNames;
32                 outputTypes["qual"] = tempOutNames;
33                 outputTypes["group"] = tempOutNames;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 vector<string> TrimSeqsCommand::getRequiredParameters(){        
42         try {
43                 string Array[] =  {"fasta"};
44                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "TrimSeqsCommand", "getRequiredParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 vector<string> TrimSeqsCommand::getRequiredFiles(){     
54         try {
55                 vector<string> myArray;
56                 return myArray;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "TrimSeqsCommand", "getRequiredFiles");
60                 exit(1);
61         }
62 }
63 //***************************************************************************************************************
64
65 TrimSeqsCommand::TrimSeqsCommand(string option)  {
66         try {
67                 
68                 abort = false;
69                 comboStarts = 0;
70                 
71                 //allow user to run help
72                 if(option == "help") { help(); abort = true; }
73                 
74                 else {
75                         //valid paramters for this command
76                         string AlignArray[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", 
77                                                                         "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
78                         
79                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
80                         
81                         OptionParser parser(option);
82                         map<string,string> parameters = parser.getParameters();
83                         
84                         ValidParameters validParameter;
85                         map<string,string>::iterator it;
86                         
87                         //check to make sure all parameters are valid for command
88                         for (it = parameters.begin(); it != parameters.end(); it++) { 
89                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
90                         }
91                         
92                         //initialize outputTypes
93                         vector<string> tempOutNames;
94                         outputTypes["fasta"] = tempOutNames;
95                         outputTypes["qual"] = tempOutNames;
96                         outputTypes["group"] = tempOutNames;
97                         
98                         //if the user changes the input directory command factory will send this info to us in the output parameter 
99                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
100                         if (inputDir == "not found"){   inputDir = "";          }
101                         else {
102                                 string path;
103                                 it = parameters.find("fasta");
104                                 //user has given a template file
105                                 if(it != parameters.end()){ 
106                                         path = m->hasPath(it->second);
107                                         //if the user has not given a path then, add inputdir. else leave path alone.
108                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
109                                 }
110                                 
111                                 it = parameters.find("oligos");
112                                 //user has given a template file
113                                 if(it != parameters.end()){ 
114                                         path = m->hasPath(it->second);
115                                         //if the user has not given a path then, add inputdir. else leave path alone.
116                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
117                                 }
118                                 
119                                 it = parameters.find("qfile");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
125                                 }
126                         }
127
128                         
129                         //check for required parameters
130                         fastaFile = validParameter.validFile(parameters, "fasta", true);
131                         if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the trim.seqs command."); m->mothurOutEndLine(); abort = true; }
132                         else if (fastaFile == "not open") { abort = true; }     
133                         
134                         //if the user changes the output directory command factory will send this info to us in the output parameter 
135                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
136                                 outputDir = ""; 
137                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
138                         }
139                 
140                         
141                         //check for optional parameter and set defaults
142                         // ...at some point should added some additional type checking...
143                         string temp;
144                         temp = validParameter.validFile(parameters, "flip", false);
145                         if (temp == "not found"){       flip = 0;       }
146                         else if(m->isTrue(temp))        {       flip = 1;       }
147                 
148                         temp = validParameter.validFile(parameters, "oligos", true);
149                         if (temp == "not found"){       oligoFile = "";         }
150                         else if(temp == "not open"){    abort = true;   } 
151                         else                                    {       oligoFile = temp;               }
152                         
153                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
154                         convert(temp, maxAmbig);  
155
156                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
157                         convert(temp, maxHomoP);  
158
159                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
160                         convert(temp, minLength); 
161                         
162                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
163                         convert(temp, maxLength);
164                         
165                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
166                         convert(temp, bdiffs);
167                         
168                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
169                         convert(temp, pdiffs);
170                         
171                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
172                         convert(temp, tdiffs);
173                         
174                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
175                         
176                         temp = validParameter.validFile(parameters, "qfile", true);     
177                         if (temp == "not found")        {       qFileName = "";         }
178                         else if(temp == "not open")     {       abort = true;           }
179                         else                                            {       qFileName = temp;       }
180                         
181                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
182                         convert(temp, qThreshold);
183                         
184                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "F"; }
185                         qtrim = m->isTrue(temp);
186
187                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
188                         convert(temp, qRollAverage);
189
190                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
191                         convert(temp, qWindowAverage);
192
193                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "100"; }
194                         convert(temp, qWindowSize);
195
196                         temp = validParameter.validFile(parameters, "qstepsize", false);                if (temp == "not found") { temp = "10"; }
197                         convert(temp, qWindowStep);
198
199                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
200                         convert(temp, qAverage);
201                         
202                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
203                         allFiles = m->isTrue(temp);
204                         
205                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found") { temp = "1"; }
206                         convert(temp, processors); 
207                         
208                         if(allFiles && oligoFile == ""){
209                                 m->mothurOut("You selected allfiles, but didn't enter an oligos file.  Ignoring the allfiles request."); m->mothurOutEndLine();
210                         }
211                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
212                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
213                                 qAverage=0;
214                                 qThreshold=0;
215                         }
216                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
217                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
218                                 abort = true;
219                         }
220                 }
221
222         }
223         catch(exception& e) {
224                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
225                 exit(1);
226         }
227 }
228 //**********************************************************************************************************************
229
230 void TrimSeqsCommand::help(){
231         try {
232                 m->mothurOut("The trim.seqs command reads a fastaFile and creates .....\n");
233                 m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n");
234                 m->mothurOut("The fasta parameter is required.\n");
235                 m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n");
236                 m->mothurOut("The oligos parameter .... The default is "".\n");
237                 m->mothurOut("The maxambig parameter .... The default is -1.\n");
238                 m->mothurOut("The maxhomop parameter .... The default is 0.\n");
239                 m->mothurOut("The minlength parameter .... The default is 0.\n");
240                 m->mothurOut("The maxlength parameter .... The default is 0.\n");
241                 m->mothurOut("The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n");
242                 m->mothurOut("The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n");
243                 m->mothurOut("The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n");
244                 m->mothurOut("The qfile parameter .....\n");
245                 m->mothurOut("The qthreshold parameter .... The default is 0.\n");
246                 m->mothurOut("The qaverage parameter .... The default is 0.\n");
247                 m->mothurOut("The allfiles parameter .... The default is F.\n");
248                 m->mothurOut("The qtrim parameter .... The default is F.\n");
249                 m->mothurOut("The trim.seqs command should be in the following format: \n");
250                 m->mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n");
251                 m->mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n");    
252                 m->mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
253                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
254                 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
255
256         }
257         catch(exception& e) {
258                 m->errorOut(e, "TrimSeqsCommand", "help");
259                 exit(1);
260         }
261 }
262
263
264 //***************************************************************************************************************
265
266 TrimSeqsCommand::~TrimSeqsCommand(){    /*      do nothing      */      }
267
268 //***************************************************************************************************************
269
270 int TrimSeqsCommand::execute(){
271         try{
272         
273                 if (abort == true) { return 0; }
274                 
275                 numFPrimers = 0;  //this needs to be initialized
276                 numRPrimers = 0;
277                 
278                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
279                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
280                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
281                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
282                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
283                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
284                 if (qFileName != "") {  outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile);  outputTypes["qual"].push_back(trimQualFile); outputTypes["qual"].push_back(scrapQualFile); }
285                 string groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
286                 
287                 vector<string> fastaFileNames;
288                 vector<string> qualFileNames;
289                 if(oligoFile != ""){
290                         outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile);
291                         getOligos(fastaFileNames, qualFileNames);
292                 }
293
294                 vector<unsigned long int> fastaFilePos;
295                 vector<unsigned long int> qFilePos;
296                 
297                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
298                 
299                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
300                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
301                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
302                 }       
303                 if(qFileName == "")     {       qLines = lines; } //files with duds
304                 
305                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
306                                 if(processors == 1){
307                                                                                 
308                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
309                                         
310                                         for (int j = 0; j < fastaFileNames.size(); j++) {
311                                                 rename((fastaFileNames[j] + toString(getpid()) + ".temp").c_str(), fastaFileNames[j].c_str());
312                                         }
313                                         if(qFileName != ""){
314                                                 for (int j = 0; j < qualFileNames.size(); j++) {
315                                                         rename((qualFileNames[j] + toString(getpid()) + ".temp").c_str(), qualFileNames[j].c_str());
316                                                 }
317                                         }                                               
318
319                                 }else{
320                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames); 
321                                         
322                                         rename((trimSeqFile + toString(processIDS[0]) + ".temp").c_str(), trimSeqFile.c_str());
323                                         rename((scrapSeqFile + toString(processIDS[0]) + ".temp").c_str(), scrapSeqFile.c_str());
324                                         rename((groupFile + toString(processIDS[0]) + ".temp").c_str(), groupFile.c_str());
325                                         
326                                         if(qFileName != ""){
327                                                 rename((trimQualFile + toString(processIDS[0]) + ".temp").c_str(), trimQualFile.c_str());
328                                                 rename((scrapQualFile + toString(processIDS[0]) + ".temp").c_str(), scrapQualFile.c_str());
329                                         }
330                                         
331                                         
332                                         for (int j = 0; j < fastaFileNames.size(); j++) {
333                                                 rename((fastaFileNames[j] + toString(processIDS[0]) + ".temp").c_str(), fastaFileNames[j].c_str());
334                                         }
335                                         if(qFileName != ""){
336                                                 for (int j = 0; j < qualFileNames.size(); j++) {
337                                                         rename((qualFileNames[j] + toString(getpid()) + ".temp").c_str(), qualFileNames[j].c_str());
338                                                 }
339                                         }                                               
340                                         
341                                         //append files
342                                         for(int i=1;i<processors;i++){
343                                                 m->appendFiles((trimSeqFile + toString(processIDS[i]) + ".temp"), trimSeqFile);
344                                                 remove((trimSeqFile + toString(processIDS[i]) + ".temp").c_str());
345                                                 m->appendFiles((scrapSeqFile + toString(processIDS[i]) + ".temp"), scrapSeqFile);
346                                                 remove((scrapSeqFile + toString(processIDS[i]) + ".temp").c_str());
347
348                                                 m->appendFiles((trimQualFile + toString(processIDS[i]) + ".temp"), trimQualFile);
349                                                 remove((trimQualFile + toString(processIDS[i]) + ".temp").c_str());
350                                                 m->appendFiles((scrapQualFile + toString(processIDS[i]) + ".temp"), scrapQualFile);
351                                                 remove((scrapQualFile + toString(processIDS[i]) + ".temp").c_str());
352                                                 
353                                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
354                                                 remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
355                                                 for (int j = 0; j < fastaFileNames.size(); j++) {
356                                                         m->appendFiles((fastaFileNames[j] + toString(processIDS[i]) + ".temp"), fastaFileNames[j]);
357                                                         remove((fastaFileNames[j] + toString(processIDS[i]) + ".temp").c_str());
358                                                 }
359                                                 
360                                                 if(qFileName != ""){
361                                                         for (int j = 0; j < qualFileNames.size(); j++) {
362                                                                 m->appendFiles((qualFileNames[j] + toString(processIDS[i]) + ".temp"), qualFileNames[j]);
363                                                                 remove((qualFileNames[j] + toString(processIDS[i]) + ".temp").c_str());
364                                                         }
365                                                 }                                               
366                                                 
367                                                 
368                                         }
369                                 }
370                                 
371                                 if (m->control_pressed) {  return 0; }
372                 #else
373                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
374                                 
375                                 for (int j = 0; j < fastaFileNames.size(); j++) {
376                                         rename((fastaFileNames[j] + toString(j) + ".temp").c_str(), fastaFileNames[j].c_str());
377                                 }
378                                 if(qFileName != ""){
379                                         for (int j = 0; j < qualFileNames.size(); j++) {
380                                                 rename((qualFileNames[j] + toString(j) + ".temp").c_str(), qualFileNames[j].c_str());
381                                         }
382                                 }
383                                                                         
384                                 if (m->control_pressed) {  return 0; }
385                 #endif
386                                                 
387                                                                                 
388                 for(int i=0;i<fastaFileNames.size();i++){
389                         if (m->isBlank(fastaFileNames[i])) { remove(fastaFileNames[i].c_str()); }
390                         else if (filesToRemove.count(fastaFileNames[i]) > 0) { remove(fastaFileNames[i].c_str()); }
391                         else {
392                                 ifstream inFASTA;
393                                 string seqName;
394                                 m->openInputFile(fastaFileNames[i], inFASTA);
395                                 ofstream outGroups;
396                                 string outGroupFilename = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + "groups";
397                                 m->openOutputFile(outGroupFilename, outGroups);
398                                 outputNames.push_back(outGroupFilename); outputTypes["group"].push_back(outGroupFilename);  
399                                 
400                                 string thisGroup = "";
401                                 if (i > comboStarts) {
402                                         map<string, int>::iterator itCombo;
403                                         for(itCombo=combos.begin();itCombo!=combos.end(); itCombo++){
404                                                 if(itCombo->second == i){       thisGroup = itCombo->first;     combos.erase(itCombo);  break;  }
405                                         }
406                                 }else{ thisGroup = groupVector[i]; }
407                                 
408                                 while(!inFASTA.eof()){
409                                         if(inFASTA.get() == '>'){
410                                                 inFASTA >> seqName;
411                                                 outGroups << seqName << '\t' << thisGroup << endl;
412                                         }
413                                         while (!inFASTA.eof())  {       char c = inFASTA.get(); if (c == 10 || c == 13){        break;  }       }
414                                 }
415                                 outGroups.close();
416                                 inFASTA.close();
417                         }
418                 }
419                 
420                 if(qFileName != ""){
421                         for(int i=0;i<qualFileNames.size();i++){
422                                 if (m->isBlank(qualFileNames[i])) { remove(qualFileNames[i].c_str());   }
423                                 else if (filesToRemove.count(qualFileNames[i]) > 0) { remove(qualFileNames[i].c_str()); }
424                                 else {
425                                         ifstream inQual;
426                                         string seqName;
427                                         m->openInputFile(qualFileNames[i], inQual);
428 //                                      ofstream outGroups;
429 //                                      
430 //                                      string thisGroup = "";
431 //                                      if (i > comboStarts) {
432 //                                              map<string, int>::iterator itCombo;
433 //                                              for(itCombo=combos.begin();itCombo!=combos.end(); itCombo++){
434 //                                                      if(itCombo->second == i){       thisGroup = itCombo->first;     combos.erase(itCombo);  break;  }
435 //                                              }
436 //                                      }
437 //                                      else{ thisGroup = groupVector[i]; }
438                                         
439                                         inQual.close();
440                                 }
441                         }
442                 }
443                 
444                 
445                 if (m->control_pressed) { 
446                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
447                         return 0;
448                 }
449
450                 m->mothurOutEndLine();
451                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
452                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
453                 m->mothurOutEndLine();
454                 
455                 return 0;       
456                         
457         }
458         catch(exception& e) {
459                 m->errorOut(e, "TrimSeqsCommand", "execute");
460                 exit(1);
461         }
462 }
463                 
464 /**************************************************************************************/
465
466 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames, linePair* line, linePair* qline) {      
467                 
468         try {
469                 
470                 ofstream outFASTA;
471                 int able = m->openOutputFile(trimFile, outFASTA);
472                 
473                 ofstream scrapFASTA;
474                 m->openOutputFile(scrapFile, scrapFASTA);
475                 
476                 ofstream outQual;
477                 ofstream scrapQual;
478                 if(qFileName != ""){
479                         m->openOutputFile(trimQFile, outQual);
480                         m->openOutputFile(scrapQFile, scrapQual);
481                 }
482                 
483                 ofstream outGroups;
484                 //vector<ofstream*> fastaFileNames;
485                 //vector<ofstream*> qualFileNames;
486                 
487                 if (oligoFile != "") {          
488                         m->openOutputFile(groupFile, outGroups);   
489                         for (int i = 0; i < fastaNames.size(); i++) {
490
491                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
492                                 fastaNames[i] = (fastaNames[i] + toString(getpid()) + ".temp");
493                                 //fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); 
494                                 //clear old file if it exists
495                                 ofstream temp;
496                                 m->openOutputFile(fastaNames[i], temp);
497                                 temp.close();
498                                 if(qFileName != ""){
499                                         qualNames[i] = (qualNames[i] + toString(getpid()) + ".temp");
500                                         //qualFileNames.push_back(new ofstream((qualNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); 
501                                         //clear old file if it exists
502                                         ofstream temp2;
503                                         m->openOutputFile(qualNames[i], temp2);
504                                         temp2.close();
505                                 }
506                         #else
507                                 //fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(i) + ".temp").c_str(), ios::ate)); 
508                                 fastaNames[i] = (fastaNames[i] + toString(i) + ".temp");
509                                 ofstream temp;
510                                 m->openOutputFile(fastaNames[i], temp);
511                                 temp.close();                   
512                                 if(qFileName != ""){
513                                         //qualFileNames.push_back(new ofstream((qualNames[i] + toString(i) + ".temp").c_str(), ios::ate));      
514                                         qualNames[i] = (qualNames[i] + toString(i) + ".temp");
515                                         ofstream temp2;
516                                         m->openOutputFile(qualNames[i], temp2);
517                                         temp2.close();          
518                                 }
519                         #endif
520                         }
521                 }
522                 
523                 ifstream inFASTA;
524                 m->openInputFile(filename, inFASTA);
525                 inFASTA.seekg(line->start);
526                 
527                 ifstream qFile;
528                 if(qFileName != "")     {       m->openInputFile(qFileName, qFile);     qFile.seekg(qline->start);  }
529                 
530                 bool done = false;
531                 int count = 0;
532         
533                 while (!done) {
534                                 
535                         if (m->control_pressed) { 
536                                 inFASTA.close(); outFASTA.close(); scrapFASTA.close();
537                                 if (oligoFile != "") {   outGroups.close();   }
538                                 
539                                 //for(int i=0;i<fastaFileNames.size();i++){  fastaFileNames[i]->close(); delete fastaFileNames[i];  }   
540
541                                 if(qFileName != ""){
542                                         qFile.close();
543                                         //for(int i=0;i<qualFileNames.size();i++){  qualFileNames[i]->close(); delete qualFileNames[i];  }      
544                                 }
545                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
546
547                                 return 0;
548                         }
549                         
550                         int success = 1;
551                         
552
553                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
554
555                         QualityScores currQual;
556                         if(qFileName != ""){
557                                 currQual = QualityScores(qFile, currSeq.getNumBases());  m->gobble(qFile);
558                         }
559                         
560                         string origSeq = currSeq.getUnaligned();
561                         if (origSeq != "") {
562                                 int groupBar, groupPrime;
563                                 string trashCode = "";
564                                 int currentSeqsDiffs = 0;
565
566                                 if(qFileName != ""){
567                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
568                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
569                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
570                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
571
572                                         if (qtrim == 1 && (origSeq.length() != currSeq.getUnaligned().length())) { 
573                                                 success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
574                                         }
575
576                                         if(!success)                            {       trashCode += 'q';       }
577                                 }
578                         
579                                 if(barcodes.size() != 0){
580                                         success = stripBarcode(currSeq, currQual, groupBar);
581                                         if(success > bdiffs)            {       trashCode += 'b';       }
582                                         else{ currentSeqsDiffs += success;  }
583                                 }
584
585                                 if(numFPrimers != 0){
586                                         success = stripForward(currSeq, currQual, groupPrime);
587                                         if(success > pdiffs)            {       trashCode += 'f';       }
588                                         else{ currentSeqsDiffs += success;  }
589                                 }
590                                 
591                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
592
593                                 if(numRPrimers != 0){
594                                         success = stripReverse(currSeq, currQual);
595                                         if(!success)                            {       trashCode += 'r';       }
596                                 }
597                 
598                                 if(minLength > 0 || maxLength > 0){
599                                         success = cullLength(currSeq);
600                                         if(!success)                            {       trashCode += 'l';       }
601                                 }
602                                 if(maxHomoP > 0){
603                                         success = cullHomoP(currSeq);
604                                         if(!success)                            {       trashCode += 'h';       }
605                                 }
606                                 if(maxAmbig != -1){
607                                         success = cullAmbigs(currSeq);
608                                         if(!success)                            {       trashCode += 'n';       }
609                                 }
610                                 
611                                 if(flip){       currSeq.reverseComplement();            currQual.flipQScores(); }               // should go last                       
612                                 
613                                 if(trashCode.length() == 0){
614                                         currSeq.setAligned(currSeq.getUnaligned());
615                                         currSeq.printSequence(outFASTA);
616                                         currQual.printQScores(outQual);
617                                         
618                                         if(barcodes.size() != 0){
619                                                 string thisGroup = groupVector[groupBar];
620                                                 int indexToFastaFile = groupBar;
621                                                 if (primers.size() != 0){
622                                                         //does this primer have a group
623                                                         if (groupVector[groupPrime] != "") {  
624                                                                 thisGroup += "." + groupVector[groupPrime]; 
625                                                                 indexToFastaFile = combos[thisGroup];
626                                                         }
627                                                 }
628                                                 outGroups << currSeq.getName() << '\t' << thisGroup << endl;
629                                                 if(allFiles){
630                                                         ofstream outTemp;
631                                                         m->openOutputFileAppend(fastaNames[indexToFastaFile], outTemp);
632                                                         //currSeq.printSequence(*fastaFileNames[indexToFastaFile]);
633                                                         currSeq.printSequence(outTemp);
634                                                         outTemp.close();
635                                                         
636                                                         if(qFileName != ""){
637                                                                 //currQual.printQScores(*qualFileNames[indexToFastaFile]);
638                                                                 ofstream outTemp2;
639                                                                 m->openOutputFileAppend(qualNames[indexToFastaFile], outTemp2);
640                                                                 currQual.printQScores(outTemp2);
641                                                                 outTemp2.close();                                                       
642                                                         }
643                                                 }
644                                         }
645                                 }
646                                 else{
647                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
648                                         currSeq.setUnaligned(origSeq);
649                                         currSeq.setAligned(origSeq);
650                                         currSeq.printSequence(scrapFASTA);
651                                         currQual.printQScores(scrapQual);
652                                 }
653                                 count++;
654                         }
655                         
656                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
657                                 unsigned long int pos = inFASTA.tellg();
658                                 if ((pos == -1) || (pos >= line->end)) { break; }
659                         #else
660                                 if (inFASTA.eof()) { break; }
661                         #endif
662                                 
663                         //report progress
664                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
665                         
666                 }
667                 //report progress
668                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
669
670                 
671                 inFASTA.close();
672                 outFASTA.close();
673                 scrapFASTA.close();
674                 if (oligoFile != "") {   outGroups.close();   }
675                 if(qFileName != "")     {       qFile.close();  scrapQual.close(); outQual.close();     }
676                 
677                 //for(int i=0;i<fastaFileNames.size();i++){
678                 //      fastaFileNames[i]->close();
679                 //      delete fastaFileNames[i];
680                 //}             
681                 
682                 //if(qFileName != ""){
683                         //for(int i=0;i<qualFileNames.size();i++){
684                                 //qualFileNames[i]->close();
685                                 //delete qualFileNames[i];
686                         //}             
687                 //}                     
688                 
689                 return count;
690         }
691         catch(exception& e) {
692                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
693                 exit(1);
694         }
695 }
696
697 /**************************************************************************************************/
698
699 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames) {
700         try {
701 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
702                 int process = 0;
703                 int exitCommand = 1;
704                 processIDS.clear();
705                 
706                 //loop through and create all the processes you want
707                 while (process != processors) {
708                         int pid = fork();
709                         
710                         if (pid > 0) {
711                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
712                                 process++;
713                         }else if (pid == 0){
714                                 driverCreateTrim(filename, qFileName, (trimFile + toString(getpid()) + ".temp"), (scrapFile + toString(getpid()) + ".temp"), (trimQFile + toString(getpid()) + ".temp"), (scrapQFile + toString(getpid()) + ".temp"), (groupFile + toString(getpid()) + ".temp"), fastaNames, qualNames, lines[process], qLines[process]);
715                                 exit(0);
716                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
717                 }
718                 
719                 //force parent to wait until all the processes are done
720                 for (int i=0;i<processors;i++) { 
721                         int temp = processIDS[i];
722                         wait(&temp);
723                 }
724                 
725                 return exitCommand;
726 #endif          
727         }
728         catch(exception& e) {
729                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
730                 exit(1);
731         }
732 }
733
734 /**************************************************************************************************/
735
736 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
737         try {
738                 
739                 //set file positions for fasta file
740                 fastaFilePos = m->divideFile(filename, processors);
741                 
742                 if (qfilename == "") { return processors; }
743                 
744                 //get name of first sequence in each chunk
745                 map<string, int> firstSeqNames;
746                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
747                         ifstream in;
748                         m->openInputFile(filename, in);
749                         in.seekg(fastaFilePos[i]);
750                 
751                         Sequence temp(in); 
752                         firstSeqNames[temp.getName()] = i;
753                 
754                         in.close();
755                 }
756                                 
757                 //seach for filePos of each first name in the qfile and save in qfileFilePos
758                 ifstream inQual;
759                 m->openInputFile(qfilename, inQual);
760                         
761                 string input;
762                 while(!inQual.eof()){   
763                         input = m->getline(inQual);
764
765                         if (input.length() != 0) {
766                                 if(input[0] == '>'){ //this is a sequence name line
767                                         istringstream nameStream(input);
768                                         
769                                         string sname = "";  nameStream >> sname;
770                                         sname = sname.substr(1);
771                                         
772                                         map<string, int>::iterator it = firstSeqNames.find(sname);
773                                         
774                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
775                                                 unsigned long int pos = inQual.tellg(); 
776                                                 qfileFilePos.push_back(pos - input.length() - 1);       
777                                                 firstSeqNames.erase(it);
778                                         }
779                                 }
780                         }
781                         
782                         if (firstSeqNames.size() == 0) { break; }
783                 }
784                 inQual.close();
785                 
786                 if (firstSeqNames.size() != 0) { 
787                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
788                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
789                         }
790                         qFileName = "";
791                         return processors;
792                 }
793
794                 //get last file position of qfile
795                 FILE * pFile;
796                 unsigned long int size;
797                 
798                 //get num bytes in file
799                 pFile = fopen (qfilename.c_str(),"rb");
800                 if (pFile==NULL) perror ("Error opening file");
801                 else{
802                         fseek (pFile, 0, SEEK_END);
803                         size=ftell (pFile);
804                         fclose (pFile);
805                 }
806                 
807                 qfileFilePos.push_back(size);
808                 
809                 return processors;
810         }
811         catch(exception& e) {
812                 m->errorOut(e, "TrimSeqsCommand", "setLines");
813                 exit(1);
814         }
815 }
816 //***************************************************************************************************************
817
818 void TrimSeqsCommand::getOligos(vector<string>& outFASTAVec, vector<string>& outQualVec){
819         try {
820                 ifstream inOligos;
821                 m->openInputFile(oligoFile, inOligos);
822                 
823                 ofstream test;
824                 
825                 string type, oligo, group;
826                 int index=0;
827                 //int indexPrimer = 0;
828                 
829                 while(!inOligos.eof()){
830                         inOligos >> type;
831                                         
832                         if(type[0] == '#'){
833                                 while (!inOligos.eof()) {       char c = inOligos.get(); if (c == 10 || c == 13){       break;  }       } // get rest of line if there's any crap there
834                         }
835                         else{
836                                 //make type case insensitive
837                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
838                                 
839                                 inOligos >> oligo;
840                                 
841                                 for(int i=0;i<oligo.length();i++){
842                                         oligo[i] = toupper(oligo[i]);
843                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
844                                 }
845                                 
846                                 if(type == "FORWARD"){
847                                         group = "";
848                                         
849                                         // get rest of line in case there is a primer name
850                                         while (!inOligos.eof()) {       
851                                                 char c = inOligos.get(); 
852                                                 if (c == 10 || c == 13){        break;  }
853                                                 else if (c == 32 || c == 9){;} //space or tab
854                                                 else {  group += c;  }
855                                         } 
856                                         
857                                         //check for repeat barcodes
858                                         map<string, int>::iterator itPrime = primers.find(oligo);
859                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
860                                         
861                                         primers[oligo]=index; index++;
862                                         groupVector.push_back(group);
863                                         
864                                         if(allFiles){
865                                                 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
866                                                 if(qFileName != ""){
867                                                         outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
868                                                 }
869                                                 if (group == "") { //if there is not a group for this primer, then this file will not get written to, but we add it to keep the indexes correct
870                                                         filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
871                                                         if(qFileName != ""){
872                                                                 filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
873                                                         }
874                                                 }else {
875                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
876                                                         outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
877                                                         if(qFileName != ""){
878                                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
879                                                                 outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
880                                                         }                                                       
881                                                 }
882                                         }
883
884                                 }
885                                 else if(type == "REVERSE"){
886                                         Sequence oligoRC("reverse", oligo);
887                                         oligoRC.reverseComplement();
888                                         revPrimer.push_back(oligoRC.getUnaligned());
889                                 }
890                                 else if(type == "BARCODE"){
891                                         inOligos >> group;
892                                         
893                                         //check for repeat barcodes
894                                         map<string, int>::iterator itBar = barcodes.find(oligo);
895                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
896                                         
897                                         barcodes[oligo]=index; index++;
898                                         groupVector.push_back(group);
899                                         
900                                         if(allFiles){
901                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
902                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
903                                                 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
904                                                 if(qFileName != ""){
905                                                         outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
906                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
907                                                         outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
908                                                 }                                                       
909                                         }
910                                 }else{  m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
911                         }
912                         m->gobble(inOligos);
913                 }
914                 
915                 inOligos.close();
916                 
917                 //add in potential combos
918                 if(allFiles){
919                         comboStarts = outFASTAVec.size()-1;
920                         for (map<string, int>::iterator itBar = barcodes.begin(); itBar != barcodes.end(); itBar++) {
921                                 for (map<string, int>::iterator itPrime = primers.begin(); itPrime != primers.end(); itPrime++) {
922                                         if (groupVector[itPrime->second] != "") { //there is a group for this primer
923                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
924                                                 outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
925                                                 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
926                                                 combos[(groupVector[itBar->second] + "." + groupVector[itPrime->second])] = outFASTAVec.size()-1;
927                                                 
928                                                 if(qFileName != ""){
929                                                         outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
930                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
931                                                         outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
932                                                 }
933                                         }
934                                 }
935                         }
936                 }
937                 
938                 numFPrimers = primers.size();
939                 numRPrimers = revPrimer.size();
940                 
941         }
942         catch(exception& e) {
943                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
944                 exit(1);
945         }
946 }
947 //***************************************************************************************************************
948
949 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
950         try {
951                 
952                 string rawSequence = seq.getUnaligned();
953                 int success = bdiffs + 1;       //guilty until proven innocent
954                 
955                 //can you find the barcode
956                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
957                         string oligo = it->first;
958                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
959                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
960                                 break;  
961                         }
962                         
963                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
964                                 group = it->second;
965                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
966                                 
967                                 if(qual.getName() != ""){
968                                         qual.trimQScores(oligo.length(), -1);
969                                 }
970                                 
971                                 success = 0;
972                                 break;
973                         }
974                 }
975                 
976                 //if you found the barcode or if you don't want to allow for diffs
977 //              cout << success;
978                 if ((bdiffs == 0) || (success == 0)) { return success;  }
979                 
980                 else { //try aligning and see if you can find it
981 //                      cout << endl;
982
983                         int maxLength = 0;
984
985                         Alignment* alignment;
986                         if (barcodes.size() > 0) {
987                                 map<string,int>::iterator it=barcodes.begin();
988
989                                 for(it;it!=barcodes.end();it++){
990                                         if(it->first.length() > maxLength){
991                                                 maxLength = it->first.length();
992                                         }
993                                 }
994                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
995
996                         }else{ alignment = NULL; } 
997                         
998                         //can you find the barcode
999                         int minDiff = 1e6;
1000                         int minCount = 1;
1001                         int minGroup = -1;
1002                         int minPos = 0;
1003                         
1004                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1005                                 string oligo = it->first;
1006 //                              int length = oligo.length();
1007                                 
1008                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
1009                                         success = bdiffs + 10;
1010                                         break;
1011                                 }
1012                                 
1013                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1014                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1015                                 oligo = alignment->getSeqAAln();
1016                                 string temp = alignment->getSeqBAln();
1017                 
1018                                 int alnLength = oligo.length();
1019                                 
1020                                 for(int i=oligo.length()-1;i>=0;i--){
1021                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1022                                 }
1023                                 oligo = oligo.substr(0,alnLength);
1024                                 temp = temp.substr(0,alnLength);
1025                                 
1026                                 int newStart=0;
1027                                 int numDiff = countDiffs(oligo, temp);
1028                                 
1029 //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
1030                                 
1031                                 if(numDiff < minDiff){
1032                                         minDiff = numDiff;
1033                                         minCount = 1;
1034                                         minGroup = it->second;
1035                                         minPos = 0;
1036                                         for(int i=0;i<alnLength;i++){
1037                                                 if(temp[i] != '-'){
1038                                                         minPos++;
1039                                                 }
1040                                         }
1041                                 }
1042                                 else if(numDiff == minDiff){
1043                                         minCount++;
1044                                 }
1045
1046                         }
1047
1048                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1049                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1050                         else{                                                                                                   //use the best match
1051                                 group = minGroup;
1052                                 seq.setUnaligned(rawSequence.substr(minPos));
1053                                 
1054                                 if(qual.getName() != ""){
1055                                         qual.trimQScores(minPos, -1);
1056                                 }
1057                                 success = minDiff;
1058                         }
1059                         
1060                         if (alignment != NULL) {  delete alignment;  }
1061                         
1062                 }
1063 //              cout << success << endl;
1064                 
1065                 return success;
1066                 
1067         }
1068         catch(exception& e) {
1069                 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1070                 exit(1);
1071         }
1072
1073 }
1074
1075 //***************************************************************************************************************
1076
1077 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1078         try {
1079                 string rawSequence = seq.getUnaligned();
1080                 int success = pdiffs + 1;       //guilty until proven innocent
1081                 
1082                 //can you find the primer
1083                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1084                         string oligo = it->first;
1085                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1086                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1087                                 break;  
1088                         }
1089                         
1090                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1091                                 group = it->second;
1092                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1093                                 if(qual.getName() != ""){
1094                                         qual.trimQScores(oligo.length(), -1);
1095                                         
1096                                 }
1097                                 success = 0;
1098                                 break;
1099                         }
1100                 }
1101
1102                 //if you found the barcode or if you don't want to allow for diffs
1103 //              cout << success;
1104                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1105                 
1106                 else { //try aligning and see if you can find it
1107 //                      cout << endl;
1108
1109                         int maxLength = 0;
1110
1111                         Alignment* alignment;
1112                         if (primers.size() > 0) {
1113                                 map<string,int>::iterator it=primers.begin();
1114
1115                                 for(it;it!=primers.end();it++){
1116                                         if(it->first.length() > maxLength){
1117                                                 maxLength = it->first.length();
1118                                         }
1119                                 }
1120                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
1121
1122                         }else{ alignment = NULL; } 
1123                         
1124                         //can you find the barcode
1125                         int minDiff = 1e6;
1126                         int minCount = 1;
1127                         int minGroup = -1;
1128                         int minPos = 0;
1129                         
1130                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1131                                 string oligo = it->first;
1132 //                              int length = oligo.length();
1133                                 
1134                                 if(rawSequence.length() < maxLength){   
1135                                         success = pdiffs + 100;
1136                                         break;
1137                                 }
1138                                 
1139                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1140                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1141                                 oligo = alignment->getSeqAAln();
1142                                 string temp = alignment->getSeqBAln();
1143                 
1144                                 int alnLength = oligo.length();
1145                                 
1146                                 for(int i=oligo.length()-1;i>=0;i--){
1147                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1148                                 }
1149                                 oligo = oligo.substr(0,alnLength);
1150                                 temp = temp.substr(0,alnLength);
1151                                 
1152                                 int newStart=0;
1153                                 int numDiff = countDiffs(oligo, temp);
1154                                 
1155 //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
1156                                 
1157                                 if(numDiff < minDiff){
1158                                         minDiff = numDiff;
1159                                         minCount = 1;
1160                                         minGroup = it->second;
1161                                         minPos = 0;
1162                                         for(int i=0;i<alnLength;i++){
1163                                                 if(temp[i] != '-'){
1164                                                         minPos++;
1165                                                 }
1166                                         }
1167                                 }
1168                                 else if(numDiff == minDiff){
1169                                         minCount++;
1170                                 }
1171
1172                         }
1173
1174                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1175                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1176                         else{                                                                                                   //use the best match
1177                                 group = minGroup;
1178                                 seq.setUnaligned(rawSequence.substr(minPos));
1179                                 if(qual.getName() != ""){
1180                                         qual.trimQScores(minPos, -1);
1181                                 }
1182                                 success = minDiff;
1183                         }
1184                         
1185                         if (alignment != NULL) {  delete alignment;  }
1186                         
1187                 }
1188                 
1189                 return success;
1190
1191         }
1192         catch(exception& e) {
1193                 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1194                 exit(1);
1195         }
1196 }
1197
1198 //***************************************************************************************************************
1199
1200 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1201         try {
1202                 string rawSequence = seq.getUnaligned();
1203                 bool success = 0;       //guilty until proven innocent
1204                 
1205                 for(int i=0;i<numRPrimers;i++){
1206                         string oligo = revPrimer[i];
1207                         
1208                         if(rawSequence.length() < oligo.length()){
1209                                 success = 0;
1210                                 break;
1211                         }
1212                         
1213                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1214                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1215                                 if(qual.getName() != ""){
1216                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1217                                 }
1218                                 success = 1;
1219                                 break;
1220                         }
1221                 }       
1222                 return success;
1223                 
1224         }
1225         catch(exception& e) {
1226                 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1227                 exit(1);
1228         }
1229 }
1230
1231 //***************************************************************************************************************
1232
1233 bool TrimSeqsCommand::cullLength(Sequence& seq){
1234         try {
1235         
1236                 int length = seq.getNumBases();
1237                 bool success = 0;       //guilty until proven innocent
1238                 
1239                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1240                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1241                 else                                                                                            {       success = 0;    }
1242                 
1243                 return success;
1244         
1245         }
1246         catch(exception& e) {
1247                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1248                 exit(1);
1249         }
1250         
1251 }
1252
1253 //***************************************************************************************************************
1254
1255 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1256         try {
1257                 int longHomoP = seq.getLongHomoPolymer();
1258                 bool success = 0;       //guilty until proven innocent
1259                 
1260                 if(longHomoP <= maxHomoP){      success = 1;    }
1261                 else                                    {       success = 0;    }
1262                 
1263                 return success;
1264         }
1265         catch(exception& e) {
1266                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1267                 exit(1);
1268         }
1269         
1270 }
1271
1272 //***************************************************************************************************************
1273
1274 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1275         try {
1276                 int numNs = seq.getAmbigBases();
1277                 bool success = 0;       //guilty until proven innocent
1278                 
1279                 if(numNs <= maxAmbig)   {       success = 1;    }
1280                 else                                    {       success = 0;    }
1281                 
1282                 return success;
1283         }
1284         catch(exception& e) {
1285                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1286                 exit(1);
1287         }
1288         
1289 }
1290
1291 //***************************************************************************************************************
1292
1293 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1294         try {
1295                 bool success = 1;
1296                 int length = oligo.length();
1297                 
1298                 for(int i=0;i<length;i++){
1299                         
1300                         if(oligo[i] != seq[i]){
1301                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1302                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1303                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1304                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1305                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1306                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1307                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1308                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1309                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1310                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1311                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1312                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1313                                 
1314                                 if(success == 0)        {       break;   }
1315                         }
1316                         else{
1317                                 success = 1;
1318                         }
1319                 }
1320                 
1321                 return success;
1322         }
1323         catch(exception& e) {
1324                 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1325                 exit(1);
1326         }
1327
1328 }
1329 //***************************************************************************************************************
1330
1331 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1332         try {
1333
1334                 int length = oligo.length();
1335                 int countDiffs = 0;
1336                 
1337                 for(int i=0;i<length;i++){
1338                                                                 
1339                         if(oligo[i] != seq[i]){
1340                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1341                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1342                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1343                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1344                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1345                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1346                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1347                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1348                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1349                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1350                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1351                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1352                         }
1353                         
1354                 }
1355                 
1356                 return countDiffs;
1357         }
1358         catch(exception& e) {
1359                 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1360                 exit(1);
1361         }
1362
1363 }
1364 //***************************************************************************************************************
1365
1366 //bool TrimSeqsCommand::stripQualThreshold(Sequence& seq, ifstream& qFile){
1367 //      try {
1368 //              
1369 //              string rawSequence = seq.getUnaligned();
1370 //              int seqLength = seq.getNumBases();
1371 //              bool success = 0;       //guilty until proven innocent
1372 //              string name;
1373 //              
1374 //              qFile >> name;
1375 //              if (name[0] == '>') {  if(name.substr(1) != seq.getName())      {       m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); }  }
1376 //              
1377 //              while (!qFile.eof())    {       char c = qFile.get(); if (c == 10 || c == 13){  break;  }       }
1378 //              
1379 //              int score;
1380 //              int end = seqLength;
1381 //              
1382 //              for(int i=0;i<seqLength;i++){
1383 //                      qFile >> score;
1384 //                      
1385 //                      if(score < qThreshold){
1386 //                              end = i;
1387 //                              break;
1388 //                      }
1389 //              }
1390 //              for(int i=end+1;i<seqLength;i++){
1391 //                      qFile >> score;
1392 //              }
1393 //              
1394 //              seq.setUnaligned(rawSequence.substr(0,end));
1395 //              
1396 //              return 1;
1397 //      }
1398 //      catch(exception& e) {
1399 //              m->errorOut(e, "TrimSeqsCommand", "stripQualThreshold");
1400 //              exit(1);
1401 //      }
1402 //}
1403
1404 //***************************************************************************************************************
1405
1406 //bool TrimSeqsCommand::cullQualAverage(Sequence& seq, ifstream& qFile){
1407 //      try {
1408 //              string rawSequence = seq.getUnaligned();
1409 //              int seqLength = seq.getNumBases();
1410 //              bool success = 0;       //guilty until proven innocent
1411 //              string name;
1412 //              
1413 //              qFile >> name;
1414 //              if (name[0] == '>') {  if(name.substr(1) != seq.getName())      {       m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); }  }
1415 //              
1416 //              while (!qFile.eof())    {       char c = qFile.get(); if (c == 10 || c == 13){  break;  }       }
1417 //              
1418 //              float score;    
1419 //              float average = 0;
1420 //              
1421 //              for(int i=0;i<seqLength;i++){
1422 //                      qFile >> score;
1423 //                      average += score;
1424 //              }
1425 //              average /= seqLength;
1426 //
1427 //              if(average >= qAverage) {       success = 1;    }
1428 //              else                                    {       success = 0;    }
1429 //              
1430 //              return success;
1431 //      }
1432 //      catch(exception& e) {
1433 //              m->errorOut(e, "TrimSeqsCommand", "cullQualAverage");
1434 //              exit(1);
1435 //      }
1436 //}
1437
1438 //***************************************************************************************************************