]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
added pipeline commands which involved change to command factory and command class...
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //**********************************************************************************************************************
14 vector<string> TrimSeqsCommand::getValidParameters(){   
15         try {
16                 string Array[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", 
17                                                                         "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
18                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
19                 return myArray;
20         }
21         catch(exception& e) {
22                 m->errorOut(e, "TrimSeqsCommand", "getValidParameters");
23                 exit(1);
24         }
25 }
26 //**********************************************************************************************************************
27 TrimSeqsCommand::TrimSeqsCommand(){     
28         try {
29                 //initialize outputTypes
30                 vector<string> tempOutNames;
31                 outputTypes["fasta"] = tempOutNames;
32                 outputTypes["qual"] = tempOutNames;
33                 outputTypes["group"] = tempOutNames;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 vector<string> TrimSeqsCommand::getRequiredParameters(){        
42         try {
43                 string Array[] =  {"fasta"};
44                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "TrimSeqsCommand", "getRequiredParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 vector<string> TrimSeqsCommand::getRequiredFiles(){     
54         try {
55                 vector<string> myArray;
56                 return myArray;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "TrimSeqsCommand", "getRequiredFiles");
60                 exit(1);
61         }
62 }
63 //***************************************************************************************************************
64
65 TrimSeqsCommand::TrimSeqsCommand(string option)  {
66         try {
67                 
68                 abort = false;
69                 comboStarts = 0;
70                 
71                 //allow user to run help
72                 if(option == "help") { help(); abort = true; }
73                 
74                 else {
75                         //valid paramters for this command
76                         string AlignArray[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", 
77                                                                         "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
78                         
79                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
80                         
81                         OptionParser parser(option);
82                         map<string,string> parameters = parser.getParameters();
83                         
84                         ValidParameters validParameter;
85                         map<string,string>::iterator it;
86                         
87                         //check to make sure all parameters are valid for command
88                         for (it = parameters.begin(); it != parameters.end(); it++) { 
89                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
90                         }
91                         
92                         //initialize outputTypes
93                         vector<string> tempOutNames;
94                         outputTypes["fasta"] = tempOutNames;
95                         outputTypes["qual"] = tempOutNames;
96                         outputTypes["group"] = tempOutNames;
97                         
98                         //if the user changes the input directory command factory will send this info to us in the output parameter 
99                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
100                         if (inputDir == "not found"){   inputDir = "";          }
101                         else {
102                                 string path;
103                                 it = parameters.find("fasta");
104                                 //user has given a template file
105                                 if(it != parameters.end()){ 
106                                         path = m->hasPath(it->second);
107                                         //if the user has not given a path then, add inputdir. else leave path alone.
108                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
109                                 }
110                                 
111                                 it = parameters.find("oligos");
112                                 //user has given a template file
113                                 if(it != parameters.end()){ 
114                                         path = m->hasPath(it->second);
115                                         //if the user has not given a path then, add inputdir. else leave path alone.
116                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
117                                 }
118                                 
119                                 it = parameters.find("qfile");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
125                                 }
126                         }
127
128                         
129                         //check for required parameters
130                         fastaFile = validParameter.validFile(parameters, "fasta", true);
131                         if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the trim.seqs command."); m->mothurOutEndLine(); abort = true; }
132                         else if (fastaFile == "not open") { abort = true; }     
133                         
134                         //if the user changes the output directory command factory will send this info to us in the output parameter 
135                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
136                                 outputDir = ""; 
137                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
138                         }
139                 
140                         
141                         //check for optional parameter and set defaults
142                         // ...at some point should added some additional type checking...
143                         string temp;
144                         temp = validParameter.validFile(parameters, "flip", false);
145                         if (temp == "not found"){       flip = 0;       }
146                         else if(m->isTrue(temp))        {       flip = 1;       }
147                 
148                         temp = validParameter.validFile(parameters, "oligos", true);
149                         if (temp == "not found"){       oligoFile = "";         }
150                         else if(temp == "not open"){    abort = true;   } 
151                         else                                    {       oligoFile = temp;               }
152                         
153                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
154                         convert(temp, maxAmbig);  
155
156                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
157                         convert(temp, maxHomoP);  
158
159                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
160                         convert(temp, minLength); 
161                         
162                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
163                         convert(temp, maxLength);
164                         
165                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
166                         convert(temp, bdiffs);
167                         
168                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
169                         convert(temp, pdiffs);
170                         
171                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
172                         convert(temp, tdiffs);
173                         
174                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
175                         
176                         temp = validParameter.validFile(parameters, "qfile", true);     
177                         if (temp == "not found")        {       qFileName = "";         }
178                         else if(temp == "not open")     {       abort = true;           }
179                         else                                            {       qFileName = temp;       }
180                         
181                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
182                         convert(temp, qThreshold);
183                         
184                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "F"; }
185                         qtrim = m->isTrue(temp);
186
187                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
188                         convert(temp, qRollAverage);
189
190                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
191                         convert(temp, qWindowAverage);
192
193                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "100"; }
194                         convert(temp, qWindowSize);
195
196                         temp = validParameter.validFile(parameters, "qstepsize", false);                if (temp == "not found") { temp = "10"; }
197                         convert(temp, qWindowStep);
198
199                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
200                         convert(temp, qAverage);
201                         
202                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
203                         allFiles = m->isTrue(temp);
204                         
205                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found") { temp = "1"; }
206                         convert(temp, processors); 
207                         
208                         if(allFiles && oligoFile == ""){
209                                 m->mothurOut("You selected allfiles, but didn't enter an oligos file.  Ignoring the allfiles request."); m->mothurOutEndLine();
210                         }
211                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
212                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
213                                 qAverage=0;
214                                 qThreshold=0;
215                         }
216                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
217                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
218                                 abort = true;
219                         }
220                 }
221
222         }
223         catch(exception& e) {
224                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
225                 exit(1);
226         }
227 }
228 //**********************************************************************************************************************
229
230 void TrimSeqsCommand::help(){
231         try {
232                 m->mothurOut("The trim.seqs command reads a fastaFile and creates .....\n");
233                 m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n");
234                 m->mothurOut("The fasta parameter is required.\n");
235                 m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n");
236                 m->mothurOut("The oligos parameter .... The default is "".\n");
237                 m->mothurOut("The maxambig parameter .... The default is -1.\n");
238                 m->mothurOut("The maxhomop parameter .... The default is 0.\n");
239                 m->mothurOut("The minlength parameter .... The default is 0.\n");
240                 m->mothurOut("The maxlength parameter .... The default is 0.\n");
241                 m->mothurOut("The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n");
242                 m->mothurOut("The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n");
243                 m->mothurOut("The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n");
244                 m->mothurOut("The qfile parameter .....\n");
245                 m->mothurOut("The qthreshold parameter .... The default is 0.\n");
246                 m->mothurOut("The qaverage parameter .... The default is 0.\n");
247                 m->mothurOut("The allfiles parameter .... The default is F.\n");
248                 m->mothurOut("The qtrim parameter .... The default is F.\n");
249                 m->mothurOut("The trim.seqs command should be in the following format: \n");
250                 m->mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n");
251                 m->mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n");    
252                 m->mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
253                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
254                 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
255
256         }
257         catch(exception& e) {
258                 m->errorOut(e, "TrimSeqsCommand", "help");
259                 exit(1);
260         }
261 }
262
263
264 //***************************************************************************************************************
265
266 TrimSeqsCommand::~TrimSeqsCommand(){    /*      do nothing      */      }
267
268 //***************************************************************************************************************
269
270 int TrimSeqsCommand::execute(){
271         try{
272         
273                 if (abort == true) { return 0; }
274                 
275                 numFPrimers = 0;  //this needs to be initialized
276                 numRPrimers = 0;
277                 
278                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
279                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
280                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
281                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
282                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
283                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
284                 if (qFileName != "") {  outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile);  outputTypes["qual"].push_back(trimQualFile); outputTypes["qual"].push_back(scrapQualFile); }
285                 string groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
286                 
287                 vector<string> fastaFileNames;
288                 vector<string> qualFileNames;
289                 if(oligoFile != ""){
290                         outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile);
291                         getOligos(fastaFileNames, qualFileNames);
292                 }
293
294                 vector<unsigned long int> fastaFilePos;
295                 vector<unsigned long int> qFilePos;
296                 
297                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
298                 
299                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
300                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
301                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
302                 }       
303                 if(qFileName == "")     {       qLines = lines; } //files with duds
304                 
305                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
306                                 if(processors == 1){
307                                                                                 
308                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
309                                         
310                                         for (int j = 0; j < fastaFileNames.size(); j++) {
311                                                 rename((fastaFileNames[j] + toString(getpid()) + ".temp").c_str(), fastaFileNames[j].c_str());
312                                         }
313                                         if(qFileName != ""){
314                                                 for (int j = 0; j < qualFileNames.size(); j++) {
315                                                         rename((qualFileNames[j] + toString(getpid()) + ".temp").c_str(), qualFileNames[j].c_str());
316                                                 }
317                                         }                                               
318
319                                 }else{
320                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames); 
321                                         
322                                         rename((trimSeqFile + toString(processIDS[0]) + ".temp").c_str(), trimSeqFile.c_str());
323                                         rename((scrapSeqFile + toString(processIDS[0]) + ".temp").c_str(), scrapSeqFile.c_str());
324                                         rename((groupFile + toString(processIDS[0]) + ".temp").c_str(), groupFile.c_str());
325                                         
326                                         if(qFileName != ""){
327                                                 rename((trimQualFile + toString(processIDS[0]) + ".temp").c_str(), trimQualFile.c_str());
328                                                 rename((scrapQualFile + toString(processIDS[0]) + ".temp").c_str(), scrapQualFile.c_str());
329                                         }
330                                         
331                                         
332                                         for (int j = 0; j < fastaFileNames.size(); j++) {
333                                                 rename((fastaFileNames[j] + toString(processIDS[0]) + ".temp").c_str(), fastaFileNames[j].c_str());
334                                         }
335                                         if(qFileName != ""){
336                                                 for (int j = 0; j < qualFileNames.size(); j++) {
337                                                         rename((qualFileNames[j] + toString(getpid()) + ".temp").c_str(), qualFileNames[j].c_str());
338                                                 }
339                                         }                                               
340                                         
341                                         //append files
342                                         for(int i=1;i<processors;i++){
343                                                 m->appendFiles((trimSeqFile + toString(processIDS[i]) + ".temp"), trimSeqFile);
344                                                 remove((trimSeqFile + toString(processIDS[i]) + ".temp").c_str());
345                                                 m->appendFiles((scrapSeqFile + toString(processIDS[i]) + ".temp"), scrapSeqFile);
346                                                 remove((scrapSeqFile + toString(processIDS[i]) + ".temp").c_str());
347
348                                                 m->appendFiles((trimQualFile + toString(processIDS[i]) + ".temp"), trimQualFile);
349                                                 remove((trimQualFile + toString(processIDS[i]) + ".temp").c_str());
350                                                 m->appendFiles((scrapQualFile + toString(processIDS[i]) + ".temp"), scrapQualFile);
351                                                 remove((scrapQualFile + toString(processIDS[i]) + ".temp").c_str());
352                                                 
353                                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
354                                                 remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
355                                                 for (int j = 0; j < fastaFileNames.size(); j++) {
356                                                         m->appendFiles((fastaFileNames[j] + toString(processIDS[i]) + ".temp"), fastaFileNames[j]);
357                                                         remove((fastaFileNames[j] + toString(processIDS[i]) + ".temp").c_str());
358                                                 }
359                                                 
360                                                 if(qFileName != ""){
361                                                         for (int j = 0; j < qualFileNames.size(); j++) {
362                                                                 m->appendFiles((qualFileNames[j] + toString(processIDS[i]) + ".temp"), qualFileNames[j]);
363                                                                 remove((qualFileNames[j] + toString(processIDS[i]) + ".temp").c_str());
364                                                         }
365                                                 }                                               
366                                                 
367                                                 
368                                         }
369                                 }
370                                 
371                                 if (m->control_pressed) {  return 0; }
372                 #else
373                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
374                                 
375                                 for (int j = 0; j < fastaFileNames.size(); j++) {
376                                         rename((fastaFileNames[j] + toString(j) + ".temp").c_str(), fastaFileNames[j].c_str());
377                                 }
378                                 if(qFileName != ""){
379                                         for (int j = 0; j < qualFileNames.size(); j++) {
380                                                 rename((qualFileNames[j] + toString(j) + ".temp").c_str(), qualFileNames[j].c_str());
381                                         }
382                                 }
383                                                                         
384                                 if (m->control_pressed) {  return 0; }
385                 #endif
386                                                 
387                                                                                 
388                 for(int i=0;i<fastaFileNames.size();i++){
389                         if (m->isBlank(fastaFileNames[i])) { remove(fastaFileNames[i].c_str()); }
390                         else if (filesToRemove.count(fastaFileNames[i]) > 0) { remove(fastaFileNames[i].c_str()); }
391                         else {
392                                 ifstream inFASTA;
393                                 string seqName;
394                                 m->openInputFile(fastaFileNames[i], inFASTA);
395                                 ofstream outGroups;
396                                 string outGroupFilename = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + "groups";
397                                 m->openOutputFile(outGroupFilename, outGroups);
398                                 outputNames.push_back(outGroupFilename); outputTypes["group"].push_back(outGroupFilename);  
399                                 
400                                 string thisGroup = "";
401                                 if (i > comboStarts) {
402                                         map<string, int>::iterator itCombo;
403                                         for(itCombo=combos.begin();itCombo!=combos.end(); itCombo++){
404                                                 if(itCombo->second == i){       thisGroup = itCombo->first;     combos.erase(itCombo);  break;  }
405                                         }
406                                 }else{ thisGroup = groupVector[i]; }
407                                 
408                                 while(!inFASTA.eof()){
409                                         if(inFASTA.get() == '>'){
410                                                 inFASTA >> seqName;
411                                                 outGroups << seqName << '\t' << thisGroup << endl;
412                                         }
413                                         while (!inFASTA.eof())  {       char c = inFASTA.get(); if (c == 10 || c == 13){        break;  }       }
414                                 }
415                                 outGroups.close();
416                                 inFASTA.close();
417                         }
418                 }
419                 
420                 if(qFileName != ""){
421                         for(int i=0;i<qualFileNames.size();i++){
422                                 if (m->isBlank(qualFileNames[i])) { remove(qualFileNames[i].c_str());   }
423                                 else if (filesToRemove.count(qualFileNames[i]) > 0) { remove(qualFileNames[i].c_str()); }
424                                 else {
425                                         ifstream inQual;
426                                         string seqName;
427                                         m->openInputFile(qualFileNames[i], inQual);
428 //                                      ofstream outGroups;
429 //                                      
430 //                                      string thisGroup = "";
431 //                                      if (i > comboStarts) {
432 //                                              map<string, int>::iterator itCombo;
433 //                                              for(itCombo=combos.begin();itCombo!=combos.end(); itCombo++){
434 //                                                      if(itCombo->second == i){       thisGroup = itCombo->first;     combos.erase(itCombo);  break;  }
435 //                                              }
436 //                                      }
437 //                                      else{ thisGroup = groupVector[i]; }
438                                         
439                                         inQual.close();
440                                 }
441                         }
442                 }
443                 
444                 
445                 if (m->control_pressed) { 
446                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
447                         return 0;
448                 }
449
450                 m->mothurOutEndLine();
451                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
452                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
453                 m->mothurOutEndLine();
454                 
455                 return 0;       
456                         
457         }
458         catch(exception& e) {
459                 m->errorOut(e, "TrimSeqsCommand", "execute");
460                 exit(1);
461         }
462 }
463                 
464 /**************************************************************************************/
465
466 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames, linePair* line, linePair* qline) {      
467                 
468         try {
469                 
470                 ofstream outFASTA;
471                 int able = m->openOutputFile(trimFile, outFASTA);
472                 
473                 ofstream scrapFASTA;
474                 m->openOutputFile(scrapFile, scrapFASTA);
475                 
476                 ofstream outQual;
477                 ofstream scrapQual;
478                 if(qFileName != ""){
479                         m->openOutputFile(trimQFile, outQual);
480                         m->openOutputFile(scrapQFile, scrapQual);
481                 }
482                 
483                 ofstream outGroups;
484                 vector<ofstream*> fastaFileNames;
485                 vector<ofstream*> qualFileNames;
486                 
487                 if (oligoFile != "") {          
488                         m->openOutputFile(groupFile, outGroups);   
489                         for (int i = 0; i < fastaNames.size(); i++) {
490
491                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
492                                 fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); 
493                                 if(qFileName != ""){
494                                         qualFileNames.push_back(new ofstream((qualNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); 
495                                 }
496                         #else
497                                 fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(i) + ".temp").c_str(), ios::ate));                      
498                                 if(qFileName != ""){
499                                         qualFileNames.push_back(new ofstream((qualNames[i] + toString(i) + ".temp").c_str(), ios::ate));                        
500                                 }
501                         #endif
502                         }
503                 }
504                 
505                 ifstream inFASTA;
506                 m->openInputFile(filename, inFASTA);
507                 inFASTA.seekg(line->start);
508                 
509                 ifstream qFile;
510                 if(qFileName != "")     {       m->openInputFile(qFileName, qFile);     qFile.seekg(qline->start);  }
511                 
512                 bool done = false;
513                 int count = 0;
514         
515                 while (!done) {
516                                 
517                         if (m->control_pressed) { 
518                                 inFASTA.close(); outFASTA.close(); scrapFASTA.close();
519                                 if (oligoFile != "") {   outGroups.close();   }
520                                 
521                                 for(int i=0;i<fastaFileNames.size();i++){  fastaFileNames[i]->close(); delete fastaFileNames[i];  }     
522
523                                 if(qFileName != ""){
524                                         qFile.close();
525                                         for(int i=0;i<qualFileNames.size();i++){  qualFileNames[i]->close(); delete qualFileNames[i];  }        
526                                 }
527                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
528
529                                 return 0;
530                         }
531                         
532                         int success = 1;
533                         
534
535                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
536
537                         QualityScores currQual;
538                         if(qFileName != ""){
539                                 currQual = QualityScores(qFile, currSeq.getNumBases());  m->gobble(qFile);
540                         }
541                         
542                         string origSeq = currSeq.getUnaligned();
543                         if (origSeq != "") {
544                                 int groupBar, groupPrime;
545                                 string trashCode = "";
546                                 int currentSeqsDiffs = 0;
547
548                                 if(qFileName != ""){
549                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
550                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
551                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
552                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
553
554                                         if (qtrim == 1 && (origSeq.length() != currSeq.getUnaligned().length())) { 
555                                                 success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
556                                         }
557
558                                         if(!success)                            {       trashCode += 'q';       }
559                                 }
560                         
561                                 if(barcodes.size() != 0){
562                                         success = stripBarcode(currSeq, currQual, groupBar);
563                                         if(success > bdiffs)            {       trashCode += 'b';       }
564                                         else{ currentSeqsDiffs += success;  }
565                                 }
566
567                                 if(numFPrimers != 0){
568                                         success = stripForward(currSeq, currQual, groupPrime);
569                                         if(success > pdiffs)            {       trashCode += 'f';       }
570                                         else{ currentSeqsDiffs += success;  }
571                                 }
572                                 
573                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
574
575                                 if(numRPrimers != 0){
576                                         success = stripReverse(currSeq, currQual);
577                                         if(!success)                            {       trashCode += 'r';       }
578                                 }
579                 
580                                 if(minLength > 0 || maxLength > 0){
581                                         success = cullLength(currSeq);
582                                         if(!success)                            {       trashCode += 'l';       }
583                                 }
584                                 if(maxHomoP > 0){
585                                         success = cullHomoP(currSeq);
586                                         if(!success)                            {       trashCode += 'h';       }
587                                 }
588                                 if(maxAmbig != -1){
589                                         success = cullAmbigs(currSeq);
590                                         if(!success)                            {       trashCode += 'n';       }
591                                 }
592                                 
593                                 if(flip){       currSeq.reverseComplement();            currQual.flipQScores(); }               // should go last                       
594                                 
595                                 if(trashCode.length() == 0){
596                                         currSeq.setAligned(currSeq.getUnaligned());
597                                         currSeq.printSequence(outFASTA);
598                                         currQual.printQScores(outQual);
599                                         
600                                         if(barcodes.size() != 0){
601                                                 string thisGroup = groupVector[groupBar];
602                                                 int indexToFastaFile = groupBar;
603                                                 if (primers.size() != 0){
604                                                         //does this primer have a group
605                                                         if (groupVector[groupPrime] != "") {  
606                                                                 thisGroup += "." + groupVector[groupPrime]; 
607                                                                 indexToFastaFile = combos[thisGroup];
608                                                         }
609                                                 }
610                                                 outGroups << currSeq.getName() << '\t' << thisGroup << endl;
611                                                 if(allFiles){
612                                                         currSeq.printSequence(*fastaFileNames[indexToFastaFile]);
613                                                         
614                                                         if(qFileName != ""){
615                                                                 currQual.printQScores(*qualFileNames[indexToFastaFile]);                                                        
616                                                         }
617                                                 }
618                                         }
619                                 }
620                                 else{
621                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
622                                         currSeq.setUnaligned(origSeq);
623                                         currSeq.setAligned(origSeq);
624                                         currSeq.printSequence(scrapFASTA);
625                                         currQual.printQScores(scrapQual);
626                                 }
627                                 count++;
628                         }
629                         
630                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
631                                 unsigned long int pos = inFASTA.tellg();
632                                 if ((pos == -1) || (pos >= line->end)) { break; }
633                         #else
634                                 if (inFASTA.eof()) { break; }
635                         #endif
636                                 
637                         //report progress
638                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
639                         
640                 }
641                 //report progress
642                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
643
644                 
645                 inFASTA.close();
646                 outFASTA.close();
647                 scrapFASTA.close();
648                 if (oligoFile != "") {   outGroups.close();   }
649                 if(qFileName != "")     {       qFile.close();  scrapQual.close(); outQual.close();     }
650                 
651                 for(int i=0;i<fastaFileNames.size();i++){
652                         fastaFileNames[i]->close();
653                         delete fastaFileNames[i];
654                 }               
655                 
656                 if(qFileName != ""){
657                         for(int i=0;i<qualFileNames.size();i++){
658                                 qualFileNames[i]->close();
659                                 delete qualFileNames[i];
660                         }               
661                 }                       
662                 
663                 return count;
664         }
665         catch(exception& e) {
666                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
667                 exit(1);
668         }
669 }
670
671 /**************************************************************************************************/
672
673 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames) {
674         try {
675 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
676                 int process = 0;
677                 int exitCommand = 1;
678                 processIDS.clear();
679                 
680                 //loop through and create all the processes you want
681                 while (process != processors) {
682                         int pid = fork();
683                         
684                         if (pid > 0) {
685                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
686                                 process++;
687                         }else if (pid == 0){
688                                 driverCreateTrim(filename, qFileName, (trimFile + toString(getpid()) + ".temp"), (scrapFile + toString(getpid()) + ".temp"), (trimQFile + toString(getpid()) + ".temp"), (scrapQFile + toString(getpid()) + ".temp"), (groupFile + toString(getpid()) + ".temp"), fastaNames, qualNames, lines[process], qLines[process]);
689                                 exit(0);
690                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
691                 }
692                 
693                 //force parent to wait until all the processes are done
694                 for (int i=0;i<processors;i++) { 
695                         int temp = processIDS[i];
696                         wait(&temp);
697                 }
698                 
699                 return exitCommand;
700 #endif          
701         }
702         catch(exception& e) {
703                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
704                 exit(1);
705         }
706 }
707
708 /**************************************************************************************************/
709
710 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
711         try {
712                 
713                 //set file positions for fasta file
714                 fastaFilePos = m->divideFile(filename, processors);
715                 
716                 if (qfilename == "") { return processors; }
717                 
718                 //get name of first sequence in each chunk
719                 map<string, int> firstSeqNames;
720                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
721                         ifstream in;
722                         m->openInputFile(filename, in);
723                         in.seekg(fastaFilePos[i]);
724                 
725                         Sequence temp(in); 
726                         firstSeqNames[temp.getName()] = i;
727                 
728                         in.close();
729                 }
730                                 
731                 //seach for filePos of each first name in the qfile and save in qfileFilePos
732                 ifstream inQual;
733                 m->openInputFile(qfilename, inQual);
734                         
735                 string input;
736                 while(!inQual.eof()){   
737                         input = m->getline(inQual);
738
739                         if (input.length() != 0) {
740                                 if(input[0] == '>'){ //this is a sequence name line
741                                         istringstream nameStream(input);
742                                         
743                                         string sname = "";  nameStream >> sname;
744                                         sname = sname.substr(1);
745                                         
746                                         map<string, int>::iterator it = firstSeqNames.find(sname);
747                                         
748                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
749                                                 unsigned long int pos = inQual.tellg(); 
750                                                 qfileFilePos.push_back(pos - input.length() - 1);       
751                                                 firstSeqNames.erase(it);
752                                         }
753                                 }
754                         }
755                         
756                         if (firstSeqNames.size() == 0) { break; }
757                 }
758                 inQual.close();
759                 
760                 if (firstSeqNames.size() != 0) { 
761                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
762                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
763                         }
764                         qFileName = "";
765                         return processors;
766                 }
767
768                 //get last file position of qfile
769                 FILE * pFile;
770                 unsigned long int size;
771                 
772                 //get num bytes in file
773                 pFile = fopen (qfilename.c_str(),"rb");
774                 if (pFile==NULL) perror ("Error opening file");
775                 else{
776                         fseek (pFile, 0, SEEK_END);
777                         size=ftell (pFile);
778                         fclose (pFile);
779                 }
780                 
781                 qfileFilePos.push_back(size);
782                 
783                 return processors;
784         }
785         catch(exception& e) {
786                 m->errorOut(e, "TrimSeqsCommand", "setLines");
787                 exit(1);
788         }
789 }
790 //***************************************************************************************************************
791
792 void TrimSeqsCommand::getOligos(vector<string>& outFASTAVec, vector<string>& outQualVec){
793         try {
794                 ifstream inOligos;
795                 m->openInputFile(oligoFile, inOligos);
796                 
797                 ofstream test;
798                 
799                 string type, oligo, group;
800                 int index=0;
801                 //int indexPrimer = 0;
802                 
803                 while(!inOligos.eof()){
804                         inOligos >> type;
805                                         
806                         if(type[0] == '#'){
807                                 while (!inOligos.eof()) {       char c = inOligos.get(); if (c == 10 || c == 13){       break;  }       } // get rest of line if there's any crap there
808                         }
809                         else{
810                                 //make type case insensitive
811                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
812                                 
813                                 inOligos >> oligo;
814                                 
815                                 for(int i=0;i<oligo.length();i++){
816                                         oligo[i] = toupper(oligo[i]);
817                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
818                                 }
819                                 
820                                 if(type == "FORWARD"){
821                                         group = "";
822                                         
823                                         // get rest of line in case there is a primer name
824                                         while (!inOligos.eof()) {       
825                                                 char c = inOligos.get(); 
826                                                 if (c == 10 || c == 13){        break;  }
827                                                 else if (c == 32 || c == 9){;} //space or tab
828                                                 else {  group += c;  }
829                                         } 
830                                         
831                                         //check for repeat barcodes
832                                         map<string, int>::iterator itPrime = primers.find(oligo);
833                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
834                                         
835                                         primers[oligo]=index; index++;
836                                         groupVector.push_back(group);
837                                         
838                                         if(allFiles){
839                                                 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
840                                                 if(qFileName != ""){
841                                                         outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
842                                                 }
843                                                 if (group == "") { //if there is not a group for this primer, then this file will not get written to, but we add it to keep the indexes correct
844                                                         filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
845                                                         if(qFileName != ""){
846                                                                 filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
847                                                         }
848                                                 }else {
849                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
850                                                         outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
851                                                         if(qFileName != ""){
852                                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
853                                                                 outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
854                                                         }                                                       
855                                                 }
856                                         }
857
858                                 }
859                                 else if(type == "REVERSE"){
860                                         Sequence oligoRC("reverse", oligo);
861                                         oligoRC.reverseComplement();
862                                         revPrimer.push_back(oligoRC.getUnaligned());
863                                 }
864                                 else if(type == "BARCODE"){
865                                         inOligos >> group;
866                                         
867                                         //check for repeat barcodes
868                                         map<string, int>::iterator itBar = barcodes.find(oligo);
869                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
870                                         
871                                         barcodes[oligo]=index; index++;
872                                         groupVector.push_back(group);
873                                         
874                                         if(allFiles){
875                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
876                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
877                                                 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
878                                                 if(qFileName != ""){
879                                                         outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
880                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
881                                                         outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
882                                                 }                                                       
883                                         }
884                                 }else{  m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
885                         }
886                         m->gobble(inOligos);
887                 }
888                 
889                 inOligos.close();
890                 
891                 //add in potential combos
892                 if(allFiles){
893                         comboStarts = outFASTAVec.size()-1;
894                         for (map<string, int>::iterator itBar = barcodes.begin(); itBar != barcodes.end(); itBar++) {
895                                 for (map<string, int>::iterator itPrime = primers.begin(); itPrime != primers.end(); itPrime++) {
896                                         if (groupVector[itPrime->second] != "") { //there is a group for this primer
897                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
898                                                 outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
899                                                 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
900                                                 combos[(groupVector[itBar->second] + "." + groupVector[itPrime->second])] = outFASTAVec.size()-1;
901                                                 
902                                                 if(qFileName != ""){
903                                                         outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
904                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
905                                                         outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
906                                                 }
907                                         }
908                                 }
909                         }
910                 }
911                 
912                 numFPrimers = primers.size();
913                 numRPrimers = revPrimer.size();
914                 
915         }
916         catch(exception& e) {
917                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
918                 exit(1);
919         }
920 }
921 //***************************************************************************************************************
922
923 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
924         try {
925                 
926                 string rawSequence = seq.getUnaligned();
927                 int success = bdiffs + 1;       //guilty until proven innocent
928                 
929                 //can you find the barcode
930                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
931                         string oligo = it->first;
932                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
933                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
934                                 break;  
935                         }
936                         
937                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
938                                 group = it->second;
939                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
940                                 
941                                 if(qual.getName() != ""){
942                                         qual.trimQScores(oligo.length(), -1);
943                                 }
944                                 
945                                 success = 0;
946                                 break;
947                         }
948                 }
949                 
950                 //if you found the barcode or if you don't want to allow for diffs
951 //              cout << success;
952                 if ((bdiffs == 0) || (success == 0)) { return success;  }
953                 
954                 else { //try aligning and see if you can find it
955 //                      cout << endl;
956
957                         int maxLength = 0;
958
959                         Alignment* alignment;
960                         if (barcodes.size() > 0) {
961                                 map<string,int>::iterator it=barcodes.begin();
962
963                                 for(it;it!=barcodes.end();it++){
964                                         if(it->first.length() > maxLength){
965                                                 maxLength = it->first.length();
966                                         }
967                                 }
968                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
969
970                         }else{ alignment = NULL; } 
971                         
972                         //can you find the barcode
973                         int minDiff = 1e6;
974                         int minCount = 1;
975                         int minGroup = -1;
976                         int minPos = 0;
977                         
978                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
979                                 string oligo = it->first;
980 //                              int length = oligo.length();
981                                 
982                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
983                                         success = bdiffs + 10;
984                                         break;
985                                 }
986                                 
987                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
988                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
989                                 oligo = alignment->getSeqAAln();
990                                 string temp = alignment->getSeqBAln();
991                 
992                                 int alnLength = oligo.length();
993                                 
994                                 for(int i=oligo.length()-1;i>=0;i--){
995                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
996                                 }
997                                 oligo = oligo.substr(0,alnLength);
998                                 temp = temp.substr(0,alnLength);
999                                 
1000                                 int newStart=0;
1001                                 int numDiff = countDiffs(oligo, temp);
1002                                 
1003 //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
1004                                 
1005                                 if(numDiff < minDiff){
1006                                         minDiff = numDiff;
1007                                         minCount = 1;
1008                                         minGroup = it->second;
1009                                         minPos = 0;
1010                                         for(int i=0;i<alnLength;i++){
1011                                                 if(temp[i] != '-'){
1012                                                         minPos++;
1013                                                 }
1014                                         }
1015                                 }
1016                                 else if(numDiff == minDiff){
1017                                         minCount++;
1018                                 }
1019
1020                         }
1021
1022                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1023                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1024                         else{                                                                                                   //use the best match
1025                                 group = minGroup;
1026                                 seq.setUnaligned(rawSequence.substr(minPos));
1027                                 
1028                                 if(qual.getName() != ""){
1029                                         qual.trimQScores(minPos, -1);
1030                                 }
1031                                 success = minDiff;
1032                         }
1033                         
1034                         if (alignment != NULL) {  delete alignment;  }
1035                         
1036                 }
1037 //              cout << success << endl;
1038                 
1039                 return success;
1040                 
1041         }
1042         catch(exception& e) {
1043                 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1044                 exit(1);
1045         }
1046
1047 }
1048
1049 //***************************************************************************************************************
1050
1051 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1052         try {
1053                 string rawSequence = seq.getUnaligned();
1054                 int success = pdiffs + 1;       //guilty until proven innocent
1055                 
1056                 //can you find the primer
1057                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1058                         string oligo = it->first;
1059                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1060                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1061                                 break;  
1062                         }
1063                         
1064                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1065                                 group = it->second;
1066                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1067                                 if(qual.getName() != ""){
1068                                         qual.trimQScores(oligo.length(), -1);
1069                                         
1070                                 }
1071                                 success = 0;
1072                                 break;
1073                         }
1074                 }
1075
1076                 //if you found the barcode or if you don't want to allow for diffs
1077 //              cout << success;
1078                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1079                 
1080                 else { //try aligning and see if you can find it
1081 //                      cout << endl;
1082
1083                         int maxLength = 0;
1084
1085                         Alignment* alignment;
1086                         if (primers.size() > 0) {
1087                                 map<string,int>::iterator it=primers.begin();
1088
1089                                 for(it;it!=primers.end();it++){
1090                                         if(it->first.length() > maxLength){
1091                                                 maxLength = it->first.length();
1092                                         }
1093                                 }
1094                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
1095
1096                         }else{ alignment = NULL; } 
1097                         
1098                         //can you find the barcode
1099                         int minDiff = 1e6;
1100                         int minCount = 1;
1101                         int minGroup = -1;
1102                         int minPos = 0;
1103                         
1104                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1105                                 string oligo = it->first;
1106 //                              int length = oligo.length();
1107                                 
1108                                 if(rawSequence.length() < maxLength){   
1109                                         success = pdiffs + 100;
1110                                         break;
1111                                 }
1112                                 
1113                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1114                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1115                                 oligo = alignment->getSeqAAln();
1116                                 string temp = alignment->getSeqBAln();
1117                 
1118                                 int alnLength = oligo.length();
1119                                 
1120                                 for(int i=oligo.length()-1;i>=0;i--){
1121                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1122                                 }
1123                                 oligo = oligo.substr(0,alnLength);
1124                                 temp = temp.substr(0,alnLength);
1125                                 
1126                                 int newStart=0;
1127                                 int numDiff = countDiffs(oligo, temp);
1128                                 
1129 //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
1130                                 
1131                                 if(numDiff < minDiff){
1132                                         minDiff = numDiff;
1133                                         minCount = 1;
1134                                         minGroup = it->second;
1135                                         minPos = 0;
1136                                         for(int i=0;i<alnLength;i++){
1137                                                 if(temp[i] != '-'){
1138                                                         minPos++;
1139                                                 }
1140                                         }
1141                                 }
1142                                 else if(numDiff == minDiff){
1143                                         minCount++;
1144                                 }
1145
1146                         }
1147
1148                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1149                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1150                         else{                                                                                                   //use the best match
1151                                 group = minGroup;
1152                                 seq.setUnaligned(rawSequence.substr(minPos));
1153                                 if(qual.getName() != ""){
1154                                         qual.trimQScores(minPos, -1);
1155                                 }
1156                                 success = minDiff;
1157                         }
1158                         
1159                         if (alignment != NULL) {  delete alignment;  }
1160                         
1161                 }
1162                 
1163                 return success;
1164
1165         }
1166         catch(exception& e) {
1167                 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1168                 exit(1);
1169         }
1170 }
1171
1172 //***************************************************************************************************************
1173
1174 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1175         try {
1176                 string rawSequence = seq.getUnaligned();
1177                 bool success = 0;       //guilty until proven innocent
1178                 
1179                 for(int i=0;i<numRPrimers;i++){
1180                         string oligo = revPrimer[i];
1181                         
1182                         if(rawSequence.length() < oligo.length()){
1183                                 success = 0;
1184                                 break;
1185                         }
1186                         
1187                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1188                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1189                                 if(qual.getName() != ""){
1190                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1191                                 }
1192                                 success = 1;
1193                                 break;
1194                         }
1195                 }       
1196                 return success;
1197                 
1198         }
1199         catch(exception& e) {
1200                 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1201                 exit(1);
1202         }
1203 }
1204
1205 //***************************************************************************************************************
1206
1207 bool TrimSeqsCommand::cullLength(Sequence& seq){
1208         try {
1209         
1210                 int length = seq.getNumBases();
1211                 bool success = 0;       //guilty until proven innocent
1212                 
1213                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1214                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1215                 else                                                                                            {       success = 0;    }
1216                 
1217                 return success;
1218         
1219         }
1220         catch(exception& e) {
1221                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1222                 exit(1);
1223         }
1224         
1225 }
1226
1227 //***************************************************************************************************************
1228
1229 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1230         try {
1231                 int longHomoP = seq.getLongHomoPolymer();
1232                 bool success = 0;       //guilty until proven innocent
1233                 
1234                 if(longHomoP <= maxHomoP){      success = 1;    }
1235                 else                                    {       success = 0;    }
1236                 
1237                 return success;
1238         }
1239         catch(exception& e) {
1240                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1241                 exit(1);
1242         }
1243         
1244 }
1245
1246 //***************************************************************************************************************
1247
1248 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1249         try {
1250                 int numNs = seq.getAmbigBases();
1251                 bool success = 0;       //guilty until proven innocent
1252                 
1253                 if(numNs <= maxAmbig)   {       success = 1;    }
1254                 else                                    {       success = 0;    }
1255                 
1256                 return success;
1257         }
1258         catch(exception& e) {
1259                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1260                 exit(1);
1261         }
1262         
1263 }
1264
1265 //***************************************************************************************************************
1266
1267 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1268         try {
1269                 bool success = 1;
1270                 int length = oligo.length();
1271                 
1272                 for(int i=0;i<length;i++){
1273                         
1274                         if(oligo[i] != seq[i]){
1275                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1276                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1277                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1278                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1279                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1280                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1281                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1282                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1283                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1284                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1285                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1286                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1287                                 
1288                                 if(success == 0)        {       break;   }
1289                         }
1290                         else{
1291                                 success = 1;
1292                         }
1293                 }
1294                 
1295                 return success;
1296         }
1297         catch(exception& e) {
1298                 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1299                 exit(1);
1300         }
1301
1302 }
1303 //***************************************************************************************************************
1304
1305 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1306         try {
1307
1308                 int length = oligo.length();
1309                 int countDiffs = 0;
1310                 
1311                 for(int i=0;i<length;i++){
1312                                                                 
1313                         if(oligo[i] != seq[i]){
1314                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1315                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1316                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1317                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1318                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1319                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1320                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1321                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1322                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1323                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1324                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1325                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1326                         }
1327                         
1328                 }
1329                 
1330                 return countDiffs;
1331         }
1332         catch(exception& e) {
1333                 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1334                 exit(1);
1335         }
1336
1337 }
1338 //***************************************************************************************************************
1339
1340 //bool TrimSeqsCommand::stripQualThreshold(Sequence& seq, ifstream& qFile){
1341 //      try {
1342 //              
1343 //              string rawSequence = seq.getUnaligned();
1344 //              int seqLength = seq.getNumBases();
1345 //              bool success = 0;       //guilty until proven innocent
1346 //              string name;
1347 //              
1348 //              qFile >> name;
1349 //              if (name[0] == '>') {  if(name.substr(1) != seq.getName())      {       m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); }  }
1350 //              
1351 //              while (!qFile.eof())    {       char c = qFile.get(); if (c == 10 || c == 13){  break;  }       }
1352 //              
1353 //              int score;
1354 //              int end = seqLength;
1355 //              
1356 //              for(int i=0;i<seqLength;i++){
1357 //                      qFile >> score;
1358 //                      
1359 //                      if(score < qThreshold){
1360 //                              end = i;
1361 //                              break;
1362 //                      }
1363 //              }
1364 //              for(int i=end+1;i<seqLength;i++){
1365 //                      qFile >> score;
1366 //              }
1367 //              
1368 //              seq.setUnaligned(rawSequence.substr(0,end));
1369 //              
1370 //              return 1;
1371 //      }
1372 //      catch(exception& e) {
1373 //              m->errorOut(e, "TrimSeqsCommand", "stripQualThreshold");
1374 //              exit(1);
1375 //      }
1376 //}
1377
1378 //***************************************************************************************************************
1379
1380 //bool TrimSeqsCommand::cullQualAverage(Sequence& seq, ifstream& qFile){
1381 //      try {
1382 //              string rawSequence = seq.getUnaligned();
1383 //              int seqLength = seq.getNumBases();
1384 //              bool success = 0;       //guilty until proven innocent
1385 //              string name;
1386 //              
1387 //              qFile >> name;
1388 //              if (name[0] == '>') {  if(name.substr(1) != seq.getName())      {       m->mothurOut("sequence name mismatch btwn fasta: " + seq.getName() + " and qual file: " + name); m->mothurOutEndLine(); }  }
1389 //              
1390 //              while (!qFile.eof())    {       char c = qFile.get(); if (c == 10 || c == 13){  break;  }       }
1391 //              
1392 //              float score;    
1393 //              float average = 0;
1394 //              
1395 //              for(int i=0;i<seqLength;i++){
1396 //                      qFile >> score;
1397 //                      average += score;
1398 //              }
1399 //              average /= seqLength;
1400 //
1401 //              if(average >= qAverage) {       success = 1;    }
1402 //              else                                    {       success = 0;    }
1403 //              
1404 //              return success;
1405 //      }
1406 //      catch(exception& e) {
1407 //              m->errorOut(e, "TrimSeqsCommand", "cullQualAverage");
1408 //              exit(1);
1409 //      }
1410 //}
1411
1412 //***************************************************************************************************************