]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
pat's changes to seq.error command
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //**********************************************************************************************************************
14 vector<string> TrimSeqsCommand::getValidParameters(){   
15         try {
16                 string Array[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile", 
17                                                                         "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
18                                                                         "keepfirst", "removelast",
19                                                                         "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
20                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
21                 return myArray;
22         }
23         catch(exception& e) {
24                 m->errorOut(e, "TrimSeqsCommand", "getValidParameters");
25                 exit(1);
26         }
27 }
28 //**********************************************************************************************************************
29 TrimSeqsCommand::TrimSeqsCommand(){     
30         try {
31                 abort = true;
32                 //initialize outputTypes
33                 vector<string> tempOutNames;
34                 outputTypes["fasta"] = tempOutNames;
35                 outputTypes["qual"] = tempOutNames;
36                 outputTypes["group"] = tempOutNames;
37         }
38         catch(exception& e) {
39                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
40                 exit(1);
41         }
42 }
43 //**********************************************************************************************************************
44 vector<string> TrimSeqsCommand::getRequiredParameters(){        
45         try {
46                 string Array[] =  {"fasta"};
47                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "TrimSeqsCommand", "getRequiredParameters");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 vector<string> TrimSeqsCommand::getRequiredFiles(){     
57         try {
58                 vector<string> myArray;
59                 return myArray;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "TrimSeqsCommand", "getRequiredFiles");
63                 exit(1);
64         }
65 }
66 //***************************************************************************************************************
67
68 TrimSeqsCommand::TrimSeqsCommand(string option)  {
69         try {
70                 
71                 abort = false;
72                 comboStarts = 0;
73                 
74                 //allow user to run help
75                 if(option == "help") { help(); abort = true; }
76                 
77                 else {
78                         //valid paramters for this command
79                         string AlignArray[] =  {        "fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile", 
80                                                                 "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
81                                                                 "keepfirst", "removelast",
82                                                                 "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
83                         
84                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
85                         
86                         OptionParser parser(option);
87                         map<string,string> parameters = parser.getParameters();
88                         
89                         ValidParameters validParameter;
90                         map<string,string>::iterator it;
91                         
92                         //check to make sure all parameters are valid for command
93                         for (it = parameters.begin(); it != parameters.end(); it++) { 
94                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
95                         }
96                         
97                         //initialize outputTypes
98                         vector<string> tempOutNames;
99                         outputTypes["fasta"] = tempOutNames;
100                         outputTypes["qual"] = tempOutNames;
101                         outputTypes["group"] = tempOutNames;
102                         
103                         //if the user changes the input directory command factory will send this info to us in the output parameter 
104                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
105                         if (inputDir == "not found"){   inputDir = "";          }
106                         else {
107                                 string path;
108                                 it = parameters.find("fasta");
109                                 //user has given a template file
110                                 if(it != parameters.end()){ 
111                                         path = m->hasPath(it->second);
112                                         //if the user has not given a path then, add inputdir. else leave path alone.
113                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
114                                 }
115                                 
116                                 it = parameters.find("oligos");
117                                 //user has given a template file
118                                 if(it != parameters.end()){ 
119                                         path = m->hasPath(it->second);
120                                         //if the user has not given a path then, add inputdir. else leave path alone.
121                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
122                                 }
123                                 
124                                 it = parameters.find("qfile");
125                                 //user has given a template file
126                                 if(it != parameters.end()){ 
127                                         path = m->hasPath(it->second);
128                                         //if the user has not given a path then, add inputdir. else leave path alone.
129                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
130                                 }
131                                 
132                                 it = parameters.find("group");
133                                 //user has given a template file
134                                 if(it != parameters.end()){ 
135                                         path = m->hasPath(it->second);
136                                         //if the user has not given a path then, add inputdir. else leave path alone.
137                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
138                                 }
139                         }
140
141                         
142                         //check for required parameters
143                         fastaFile = validParameter.validFile(parameters, "fasta", true);
144                         if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the trim.seqs command."); m->mothurOutEndLine(); abort = true; }
145                         else if (fastaFile == "not open") { abort = true; }     
146                         
147                         //if the user changes the output directory command factory will send this info to us in the output parameter 
148                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
149                                 outputDir = ""; 
150                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
151                         }
152                 
153                         
154                         //check for optional parameter and set defaults
155                         // ...at some point should added some additional type checking...
156                         string temp;
157                         temp = validParameter.validFile(parameters, "flip", false);
158                         if (temp == "not found"){       flip = 0;       }
159                         else if(m->isTrue(temp))        {       flip = 1;       }
160                 
161                         temp = validParameter.validFile(parameters, "oligos", true);
162                         if (temp == "not found"){       oligoFile = "";         }
163                         else if(temp == "not open"){    abort = true;   } 
164                         else                                    {       oligoFile = temp;               }
165                         
166                         temp = validParameter.validFile(parameters, "group", true);
167                         if (temp == "not found"){       groupfile = "";         }
168                         else if(temp == "not open"){    abort = true;   } 
169                         else                                    {       groupfile = temp;               }
170                         
171                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
172                         convert(temp, maxAmbig);  
173
174                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
175                         convert(temp, maxHomoP);  
176
177                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
178                         convert(temp, minLength); 
179                         
180                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
181                         convert(temp, maxLength);
182                         
183                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
184                         convert(temp, bdiffs);
185                         
186                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
187                         convert(temp, pdiffs);
188                         
189                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
190                         convert(temp, tdiffs);
191                         
192                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
193                         
194                         temp = validParameter.validFile(parameters, "qfile", true);     
195                         if (temp == "not found")        {       qFileName = "";         }
196                         else if(temp == "not open")     {       abort = true;           }
197                         else                                            {       qFileName = temp;       }
198                         
199                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
200                         convert(temp, qThreshold);
201                         
202                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "F"; }
203                         qtrim = m->isTrue(temp);
204
205                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
206                         convert(temp, qRollAverage);
207
208                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
209                         convert(temp, qWindowAverage);
210
211                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
212                         convert(temp, qWindowSize);
213
214                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
215                         convert(temp, qWindowStep);
216
217                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
218                         convert(temp, qAverage);
219
220                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
221                         convert(temp, keepFirst);
222
223                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
224                         convert(temp, removeLast);
225                         
226                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
227                         allFiles = m->isTrue(temp);
228                         
229                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found") { temp = "1"; }
230                         convert(temp, processors); 
231                         
232                         if ((oligoFile != "") && (groupfile != "")) {
233                                 m->mothurOut("You given both a oligos file and a groupfile, only one is allowed."); m->mothurOutEndLine(); abort = true;
234                         }
235                                                                                                 
236                         
237                         if(allFiles && (oligoFile == "") && (groupfile == "")){
238                                 m->mothurOut("You selected allfiles, but didn't enter an oligos or group file.  Ignoring the allfiles request."); m->mothurOutEndLine();
239                         }
240                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
241                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
242                                 qAverage=0;
243                                 qThreshold=0;
244                         }
245                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
246                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
247                                 abort = true;
248                         }
249                 }
250
251         }
252         catch(exception& e) {
253                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
254                 exit(1);
255         }
256 }
257 //**********************************************************************************************************************
258
259 void TrimSeqsCommand::help(){
260         try {
261                 m->mothurOut("The trim.seqs command reads a fastaFile and creates .....\n");
262                 m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, group, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n");
263                 m->mothurOut("The fasta parameter is required.\n");
264                 m->mothurOut("The group parameter allows you to enter a group file for your fasta file.\n");
265                 m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n");
266                 m->mothurOut("The oligos parameter .... The default is "".\n");
267                 m->mothurOut("The maxambig parameter .... The default is -1.\n");
268                 m->mothurOut("The maxhomop parameter .... The default is 0.\n");
269                 m->mothurOut("The minlength parameter .... The default is 0.\n");
270                 m->mothurOut("The maxlength parameter .... The default is 0.\n");
271                 m->mothurOut("The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n");
272                 m->mothurOut("The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n");
273                 m->mothurOut("The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n");
274                 m->mothurOut("The qfile parameter .....\n");
275                 m->mothurOut("The qthreshold parameter .... The default is 0.\n");
276                 m->mothurOut("The qaverage parameter .... The default is 0.\n");
277                 m->mothurOut("The allfiles parameter .... The default is F.\n");
278                 m->mothurOut("The qtrim parameter .... The default is F.\n");
279                 m->mothurOut("The trim.seqs command should be in the following format: \n");
280                 m->mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n");
281                 m->mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n");    
282                 m->mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
283                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
284                 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
285
286         }
287         catch(exception& e) {
288                 m->errorOut(e, "TrimSeqsCommand", "help");
289                 exit(1);
290         }
291 }
292
293
294 //***************************************************************************************************************
295
296 TrimSeqsCommand::~TrimSeqsCommand(){    /*      do nothing      */      }
297
298 //***************************************************************************************************************
299
300 int TrimSeqsCommand::execute(){
301         try{
302         
303                 if (abort == true) { return 0; }
304                 
305                 numFPrimers = 0;  //this needs to be initialized
306                 numRPrimers = 0;
307                 vector<string> fastaFileNames;
308                 vector<string> qualFileNames;
309                 
310                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
311                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
312                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
313                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
314                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
315                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
316                 if (qFileName != "") {  outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile);  outputTypes["qual"].push_back(trimQualFile); outputTypes["qual"].push_back(scrapQualFile); }
317                 string groupFile = "";
318                 if (groupfile == "") { groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; }
319                 else{
320                         groupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "trim.groups";
321                         outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile);
322                         groupMap = new GroupMap(groupfile);
323                         groupMap->readMap();
324                         
325                         if(allFiles){
326                                 for (int i = 0; i < groupMap->namesOfGroups.size(); i++) {
327                                         groupToIndex[groupMap->namesOfGroups[i]] = i;
328                                         groupVector.push_back(groupMap->namesOfGroups[i]);
329                                         fastaFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) +  groupMap->namesOfGroups[i] + ".fasta"));
330                                         
331                                         //we append later, so we want to clear file
332                                         ofstream outRemove;
333                                         m->openOutputFile(fastaFileNames[i], outRemove);
334                                         outRemove.close();
335                                         if(qFileName != ""){
336                                                 qualFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) +  groupMap->namesOfGroups[i] + ".qual"));
337                                                 ofstream outRemove2;
338                                                 m->openOutputFile(qualFileNames[i], outRemove2);
339                                                 outRemove2.close();
340                                         }
341                                 }
342                         }
343                         comboStarts = fastaFileNames.size()-1;
344                 }
345                 
346                 if(oligoFile != ""){
347                         outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile);
348                         getOligos(fastaFileNames, qualFileNames);
349                 }
350
351                 vector<unsigned long int> fastaFilePos;
352                 vector<unsigned long int> qFilePos;
353                 
354                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
355                 
356                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
357                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
358                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
359                 }       
360                 if(qFileName == "")     {       qLines = lines; } //files with duds
361                 
362                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
363                                 if(processors == 1){
364                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
365                                 }else{
366                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames); 
367                                 }       
368                 #else
369                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
370                 #endif
371                 
372                 if (m->control_pressed) {  return 0; }                  
373                         
374                 set<string> blanks;
375                 for(int i=0;i<fastaFileNames.size();i++){
376                         if (m->isBlank(fastaFileNames[i])) {   blanks.insert(fastaFileNames[i]);        }
377                         else if (filesToRemove.count(fastaFileNames[i]) > 0) {  remove(fastaFileNames[i].c_str()); }
378                         else {
379                                 ifstream inFASTA;
380                                 string seqName;
381                                 m->openInputFile(fastaFileNames[i], inFASTA);
382                                 ofstream outGroups;
383                                 string outGroupFilename = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + "groups";
384                                 
385                                 //if the fastafile is on the blanks list then the groups file should be as well
386                                 if (blanks.count(fastaFileNames[i]) != 0) { blanks.insert(outGroupFilename); }
387                                 
388                                 m->openOutputFile(outGroupFilename, outGroups);
389                                 outputNames.push_back(outGroupFilename); outputTypes["group"].push_back(outGroupFilename);  
390                                 
391                                 string thisGroup = "";
392                                 if (i > comboStarts) {
393                                         map<string, int>::iterator itCombo;
394                                         for(itCombo=combos.begin();itCombo!=combos.end(); itCombo++){
395                                                 if(itCombo->second == i){       thisGroup = itCombo->first;     combos.erase(itCombo);  break;  }
396                                         }
397                                 }else{ thisGroup = groupVector[i]; }
398                                         
399                                 while(!inFASTA.eof()){
400                                         if(inFASTA.get() == '>'){
401                                                 inFASTA >> seqName;
402                                                 outGroups << seqName << '\t' << thisGroup << endl;
403                                         }
404                                         while (!inFASTA.eof())  {       char c = inFASTA.get(); if (c == 10 || c == 13){        break;  }       }
405                                 }
406                                 outGroups.close();
407                                 inFASTA.close();
408                         }
409                 }
410                 
411                 for (set<string>::iterator itBlanks = blanks.begin(); itBlanks != blanks.end(); itBlanks++) {  remove((*(itBlanks)).c_str()); }
412                 
413                 blanks.clear();
414                 if(qFileName != ""){
415                         for(int i=0;i<qualFileNames.size();i++){
416                                 if (m->isBlank(qualFileNames[i])) {  blanks.insert(qualFileNames[i]);   }
417                                 else if (filesToRemove.count(qualFileNames[i]) > 0) {  remove(qualFileNames[i].c_str()); }
418                         }
419                 }
420                 
421                 for (set<string>::iterator itBlanks = blanks.begin(); itBlanks != blanks.end(); itBlanks++) {  remove((*(itBlanks)).c_str()); }
422                 
423                 if (m->control_pressed) { 
424                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
425                         return 0;
426                 }
427
428                 m->mothurOutEndLine();
429                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
430                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
431                 m->mothurOutEndLine();
432                 
433                 return 0;       
434                         
435         }
436         catch(exception& e) {
437                 m->errorOut(e, "TrimSeqsCommand", "execute");
438                 exit(1);
439         }
440 }
441                 
442 /**************************************************************************************/
443
444 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames, linePair* line, linePair* qline) {      
445                 
446         try {
447                 
448                 ofstream outFASTA;
449                 int able = m->openOutputFile(trimFile, outFASTA);
450                 
451                 ofstream scrapFASTA;
452                 m->openOutputFile(scrapFile, scrapFASTA);
453                 
454                 ofstream outQual;
455                 ofstream scrapQual;
456                 if(qFileName != ""){
457                         m->openOutputFile(trimQFile, outQual);
458                         m->openOutputFile(scrapQFile, scrapQual);
459                 }
460                 
461                 ofstream outGroups;
462                 
463                 if (oligoFile != "") {          
464                         m->openOutputFile(groupFile, outGroups);   
465                 }
466                 
467                 ifstream inFASTA;
468                 m->openInputFile(filename, inFASTA);
469                 inFASTA.seekg(line->start);
470                 
471                 ifstream qFile;
472                 if(qFileName != "")     {       m->openInputFile(qFileName, qFile);     qFile.seekg(qline->start);  }
473                 
474                 
475                 for (int i = 0; i < fastaNames.size(); i++) { //clears old file
476                         ofstream temp;
477                         m->openOutputFile(fastaNames[i], temp);
478                         temp.close();
479                 }
480                 for (int i = 0; i < qualNames.size(); i++) { //clears old file
481                         ofstream temp;
482                         m->openOutputFile(qualNames[i], temp);
483                         temp.close();
484                 }
485                 
486                         
487                 bool done = false;
488                 int count = 0;
489         
490                 while (!done) {
491                                 
492                         if (m->control_pressed) { 
493                                 inFASTA.close(); outFASTA.close(); scrapFASTA.close();
494                                 if (oligoFile != "") {   outGroups.close();   }
495
496                                 if(qFileName != ""){
497                                         qFile.close();
498                                 }
499                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
500
501                                 return 0;
502                         }
503                         
504                         int success = 1;
505                         
506
507                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
508
509                         QualityScores currQual;
510                         if(qFileName != ""){
511                                 currQual = QualityScores(qFile, currSeq.getNumBases());  m->gobble(qFile);
512                         }
513                         
514                         string origSeq = currSeq.getUnaligned();
515                         if (origSeq != "") {
516                                 int groupBar, groupPrime;
517                                 string trashCode = "";
518                                 int currentSeqsDiffs = 0;
519
520                                 if(barcodes.size() != 0){
521                                         success = stripBarcode(currSeq, currQual, groupBar);
522                                         if(success > bdiffs)            {       trashCode += 'b';       }
523                                         else{ currentSeqsDiffs += success;  }
524                                 }
525                                 
526                                 if(numFPrimers != 0){
527                                         success = stripForward(currSeq, currQual, groupPrime);
528                                         if(success > pdiffs)            {       trashCode += 'f';       }
529                                         else{ currentSeqsDiffs += success;  }
530                                 }
531                                 
532                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
533                                 
534                                 if(numRPrimers != 0){
535                                         success = stripReverse(currSeq, currQual);
536                                         if(!success)                            {       trashCode += 'r';       }
537                                 }
538
539                                 if(keepFirst != 0){
540                                         success = keepFirstTrim(currSeq, currQual);
541                                 }
542                                 
543                                 if(removeLast != 0){
544                                         success = removeLastTrim(currSeq, currQual);
545                                         if(!success)                            {       trashCode += 'l';       }
546                                 }
547
548                                 
549                                 if(qFileName != ""){
550                                         
551                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
552                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
553                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
554                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
555                                         else                                            {       success = 1;                            }
556
557 //                                      if (qtrim == 1 && (origSeq.length() != currSeq.getUnaligned().length())) { 
558 //                                              success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
559 //                                      }
560                                         
561                                         if(!success)                            {       trashCode += 'q';       }
562                                 }                               
563                 
564                                 if(minLength > 0 || maxLength > 0){
565                                         success = cullLength(currSeq);
566                                         if(!success)                            {       trashCode += 'l';       }
567                                 }
568                                 if(maxHomoP > 0){
569                                         success = cullHomoP(currSeq);
570                                         if(!success)                            {       trashCode += 'h';       }
571                                 }
572                                 if(maxAmbig != -1){
573                                         success = cullAmbigs(currSeq);
574                                         if(!success)                            {       trashCode += 'n';       }
575                                 }
576                                 
577                                 if(flip){               // should go last                       
578                                         currSeq.reverseComplement();
579                                         currQual.flipQScores(); 
580                                 }
581                                 
582                                 if(trashCode.length() == 0){
583                                         currSeq.setAligned(currSeq.getUnaligned());
584                                         currSeq.printSequence(outFASTA);
585                                         currQual.printQScores(outQual);
586                                         
587                                         if(barcodes.size() != 0){
588                                                 string thisGroup = groupVector[groupBar];
589                                                 int indexToFastaFile = groupBar;
590                                                 if (primers.size() != 0){
591                                                         //does this primer have a group
592                                                         if (groupVector[groupPrime] != "") {  
593                                                                 thisGroup += "." + groupVector[groupPrime]; 
594                                                                 indexToFastaFile = combos[thisGroup];
595                                                         }
596                                                 }
597                                                 outGroups << currSeq.getName() << '\t' << thisGroup << endl;
598                                                 if(allFiles){
599                                                         ofstream outTemp;
600                                                         m->openOutputFileAppend(fastaNames[indexToFastaFile], outTemp);
601                                                         //currSeq.printSequence(*fastaFileNames[indexToFastaFile]);
602                                                         currSeq.printSequence(outTemp);
603                                                         outTemp.close();
604                                                         
605                                                         if(qFileName != ""){
606                                                                 //currQual.printQScores(*qualFileNames[indexToFastaFile]);
607                                                                 ofstream outTemp2;
608                                                                 m->openOutputFileAppend(qualNames[indexToFastaFile], outTemp2);
609                                                                 currQual.printQScores(outTemp2);
610                                                                 outTemp2.close();                                                       
611                                                         }
612                                                 }
613                                         }
614                                         
615                                         if (groupfile != "") {
616                                                 string thisGroup = groupMap->getGroup(currSeq.getName());
617                                                 
618                                                 if (thisGroup != "not found") {
619                                                         outGroups << currSeq.getName() << '\t' << thisGroup << endl;
620                                                         if (allFiles) {
621                                                                 ofstream outTemp;
622                                                                 m->openOutputFileAppend(fastaNames[groupToIndex[thisGroup]], outTemp);
623                                                                 currSeq.printSequence(outTemp);
624                                                                 outTemp.close();
625                                                                 if(qFileName != ""){
626                                                                         ofstream outTemp2;
627                                                                         m->openOutputFileAppend(qualNames[groupToIndex[thisGroup]], outTemp2);
628                                                                         currQual.printQScores(outTemp2);
629                                                                         outTemp2.close();                                                       
630                                                                 }
631                                                         }
632                                                 }else{
633                                                         m->mothurOut(currSeq.getName() + " is not in your groupfile, adding to group XXX."); m->mothurOutEndLine();
634                                                         outGroups << currSeq.getName() << '\t' << "XXX" << endl;
635                                                         if (allFiles) {  
636                                                                 m->mothurOut("[ERROR]: " + currSeq.getName() + " will not be added to any .group.fasta or .group.qual file."); m->mothurOutEndLine();
637                                                         }
638                                                 }
639                                         }
640                                 }
641                                 else{
642                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
643                                         currSeq.setUnaligned(origSeq);
644                                         currSeq.setAligned(origSeq);
645                                         currSeq.printSequence(scrapFASTA);
646                                         currQual.printQScores(scrapQual);
647                                 }
648                                 count++;
649                         }
650                         
651                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
652                                 unsigned long int pos = inFASTA.tellg();
653                                 if ((pos == -1) || (pos >= line->end)) { break; }
654                         #else
655                                 if (inFASTA.eof()) { break; }
656                         #endif
657                                 
658                         //report progress
659                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
660                         
661                 }
662                 //report progress
663                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
664
665                 
666                 inFASTA.close();
667                 outFASTA.close();
668                 scrapFASTA.close();
669                 if (oligoFile != "") {   outGroups.close();   }
670                 if(qFileName != "")     {       qFile.close();  scrapQual.close(); outQual.close();     }
671                 
672                 return count;
673         }
674         catch(exception& e) {
675                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
676                 exit(1);
677         }
678 }
679
680 /**************************************************************************************************/
681
682 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames) {
683         try {
684 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
685                 int process = 1;
686                 int exitCommand = 1;
687                 processIDS.clear();
688                 
689                 //loop through and create all the processes you want
690                 while (process != processors) {
691                         int pid = fork();
692                         
693                         if (pid > 0) {
694                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
695                                 process++;
696                         }else if (pid == 0){
697                                 for (int i = 0; i < fastaNames.size(); i++) {
698                                         fastaNames[i] = (fastaNames[i] + toString(getpid()) + ".temp");
699                                         //clear old file if it exists
700                                         ofstream temp;
701                                         m->openOutputFile(fastaNames[i], temp);
702                                         temp.close();
703                                         if(qFileName != ""){
704                                                 qualNames[i] = (qualNames[i] + toString(getpid()) + ".temp");
705                                                 //clear old file if it exists
706                                                 ofstream temp2;
707                                                 m->openOutputFile(qualNames[i], temp2);
708                                                 temp2.close();
709                                         }
710                                 }
711                                 
712                                 driverCreateTrim(filename, qFileName, (trimFile + toString(getpid()) + ".temp"), (scrapFile + toString(getpid()) + ".temp"), (trimQFile + toString(getpid()) + ".temp"), (scrapQFile + toString(getpid()) + ".temp"), (groupFile + toString(getpid()) + ".temp"), fastaNames, qualNames, lines[process], qLines[process]);
713                                 exit(0);
714                         }else { 
715                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
716                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
717                                 exit(0);
718                         }
719                 }
720                 
721                 //parent do my part
722                 for (int i = 0; i < fastaNames.size(); i++) {
723                         //clear old file if it exists
724                         ofstream temp;
725                         m->openOutputFile(fastaNames[i], temp);
726                         temp.close();
727                         if(qFileName != ""){
728                                 //clear old file if it exists
729                                 ofstream temp2;
730                                 m->openOutputFile(qualNames[i], temp2);
731                                 temp2.close();
732                         }
733                 }
734                 
735                 driverCreateTrim(filename, qFileName, trimFile, scrapFile, trimQFile, scrapQFile, groupFile, fastaNames, qualNames, lines[0], qLines[0]);
736                 
737                 
738                 //force parent to wait until all the processes are done
739                 for (int i=0;i<processIDS.size();i++) { 
740                         int temp = processIDS[i];
741                         wait(&temp);
742                 }
743                 
744                 //append files
745                 for(int i=0;i<processIDS.size();i++){
746                         m->mothurOut("Appending files from process " + processIDS[i]); m->mothurOutEndLine();
747                         
748                         m->appendFiles((trimFile + toString(processIDS[i]) + ".temp"), trimFile);
749                         remove((trimFile + toString(processIDS[i]) + ".temp").c_str());
750                         m->appendFiles((scrapFile + toString(processIDS[i]) + ".temp"), scrapFile);
751                         remove((scrapFile + toString(processIDS[i]) + ".temp").c_str());
752                         
753                         m->mothurOut("Done with fasta files"); m->mothurOutEndLine();
754                         
755                         if(qFileName != ""){
756                                 m->appendFiles((trimQFile + toString(processIDS[i]) + ".temp"), trimQFile);
757                                 remove((trimQFile + toString(processIDS[i]) + ".temp").c_str());
758                                 m->appendFiles((scrapQFile + toString(processIDS[i]) + ".temp"), scrapQFile);
759                                 remove((scrapQFile + toString(processIDS[i]) + ".temp").c_str());
760                         
761                                 m->mothurOut("Done with quality files"); m->mothurOutEndLine();
762                         }
763                         
764                         m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
765                         remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
766                         
767                         m->mothurOut("Done with group file"); m->mothurOutEndLine();
768                         
769                         for (int j = 0; j < fastaNames.size(); j++) {
770                                 m->appendFiles((fastaNames[j] + toString(processIDS[i]) + ".temp"), fastaNames[j]);
771                                 remove((fastaNames[j] + toString(processIDS[i]) + ".temp").c_str());
772                         }
773                         
774                         if(qFileName != ""){
775                                 for (int j = 0; j < qualNames.size(); j++) {
776                                         m->appendFiles((qualNames[j] + toString(processIDS[i]) + ".temp"), qualNames[j]);
777                                         remove((qualNames[j] + toString(processIDS[i]) + ".temp").c_str());
778                                 }
779                         }
780                         
781                         if (allFiles) { m->mothurOut("Done with allfiles"); m->mothurOutEndLine(); }
782                 }
783         
784                 return exitCommand;
785 #endif          
786         }
787         catch(exception& e) {
788                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
789                 exit(1);
790         }
791 }
792
793 /**************************************************************************************************/
794
795 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
796         try {
797                 
798                 //set file positions for fasta file
799                 fastaFilePos = m->divideFile(filename, processors);
800                 
801                 if (qfilename == "") { return processors; }
802                 
803                 //get name of first sequence in each chunk
804                 map<string, int> firstSeqNames;
805                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
806                         ifstream in;
807                         m->openInputFile(filename, in);
808                         in.seekg(fastaFilePos[i]);
809                 
810                         Sequence temp(in); 
811                         firstSeqNames[temp.getName()] = i;
812                 
813                         in.close();
814                 }
815                                 
816                 //seach for filePos of each first name in the qfile and save in qfileFilePos
817                 ifstream inQual;
818                 m->openInputFile(qfilename, inQual);
819                 
820                 string input;
821                 while(!inQual.eof()){   
822                         input = m->getline(inQual);
823
824                         if (input.length() != 0) {
825                                 if(input[0] == '>'){ //this is a sequence name line
826                                         istringstream nameStream(input);
827                                         
828                                         string sname = "";  nameStream >> sname;
829                                         sname = sname.substr(1);
830                                         
831                                         map<string, int>::iterator it = firstSeqNames.find(sname);
832                                         
833                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
834                                                 unsigned long int pos = inQual.tellg(); 
835                                                 qfileFilePos.push_back(pos - input.length() - 1);       
836                                                 firstSeqNames.erase(it);
837                                         }
838                                 }
839                         }
840                         
841                         if (firstSeqNames.size() == 0) { break; }
842                 }
843                 inQual.close();
844                 
845                 
846                 if (firstSeqNames.size() != 0) { 
847                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
848                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
849                         }
850                         qFileName = "";
851                         return processors;
852                 }
853
854                 //get last file position of qfile
855                 FILE * pFile;
856                 unsigned long int size;
857                 
858                 //get num bytes in file
859                 pFile = fopen (qfilename.c_str(),"rb");
860                 if (pFile==NULL) perror ("Error opening file");
861                 else{
862                         fseek (pFile, 0, SEEK_END);
863                         size=ftell (pFile);
864                         fclose (pFile);
865                 }
866                 
867                 qfileFilePos.push_back(size);
868                 
869                 return processors;
870         }
871         catch(exception& e) {
872                 m->errorOut(e, "TrimSeqsCommand", "setLines");
873                 exit(1);
874         }
875 }
876 //***************************************************************************************************************
877
878 void TrimSeqsCommand::getOligos(vector<string>& outFASTAVec, vector<string>& outQualVec){
879         try {
880                 ifstream inOligos;
881                 m->openInputFile(oligoFile, inOligos);
882                 
883                 ofstream test;
884                 
885                 string type, oligo, group;
886                 int index=0;
887                 //int indexPrimer = 0;
888                 
889                 while(!inOligos.eof()){
890                         inOligos >> type; m->gobble(inOligos);
891                                         
892                         if(type[0] == '#'){
893                                 while (!inOligos.eof()) {       char c = inOligos.get(); if (c == 10 || c == 13){       break;  }       } // get rest of line if there's any crap there
894                         }
895                         else{
896                                 //make type case insensitive
897                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
898                                 
899                                 inOligos >> oligo;
900                                 
901                                 for(int i=0;i<oligo.length();i++){
902                                         oligo[i] = toupper(oligo[i]);
903                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
904                                 }
905                                 
906                                 if(type == "FORWARD"){
907                                         group = "";
908                                         
909                                         // get rest of line in case there is a primer name
910                                         while (!inOligos.eof()) {       
911                                                 char c = inOligos.get(); 
912                                                 if (c == 10 || c == 13){        break;  }
913                                                 else if (c == 32 || c == 9){;} //space or tab
914                                                 else {  group += c;  }
915                                         } 
916                                         
917                                         //check for repeat barcodes
918                                         map<string, int>::iterator itPrime = primers.find(oligo);
919                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
920                                         
921                                                 primers[oligo]=index; index++;
922                                                 groupVector.push_back(group);
923                                         
924                                                 if(allFiles){
925                                                         outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
926                                                         if(qFileName != ""){
927                                                                 outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
928                                                         }
929                                                         if (group == "") { //if there is not a group for this primer, then this file will not get written to, but we add it to keep the indexes correct
930                                                                 filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
931                                                                 if(qFileName != ""){
932                                                                         filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
933                                                                 }
934                                                         }else {
935                                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
936                                                                 outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
937                                                                 if(qFileName != ""){
938                                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
939                                                                         outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
940                                                                 }                                                       
941                                                         }
942                                                 }
943                                         
944                                 }
945                                 else if(type == "REVERSE"){
946                                         Sequence oligoRC("reverse", oligo);
947                                         oligoRC.reverseComplement();
948                                         revPrimer.push_back(oligoRC.getUnaligned());
949                                 }
950                                 else if(type == "BARCODE"){
951                                         inOligos >> group;
952                                         
953                                         //check for repeat barcodes
954                                         map<string, int>::iterator itBar = barcodes.find(oligo);
955                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
956                                         
957                                                 barcodes[oligo]=index; index++;
958                                                 groupVector.push_back(group);
959                                                 
960                                                 if(allFiles){
961                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
962                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
963                                                         outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
964                                                         if(qFileName != ""){
965                                                                 outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
966                                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
967                                                                 outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
968                                                         }                                                       
969                                                 }
970                                         
971                                 }else{  m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
972                         }
973                         m->gobble(inOligos);
974                 }
975                 
976                 inOligos.close();
977                 
978                 //add in potential combos
979                 if(allFiles){
980                         comboStarts = outFASTAVec.size()-1;
981                         for (map<string, int>::iterator itBar = barcodes.begin(); itBar != barcodes.end(); itBar++) {
982                                 for (map<string, int>::iterator itPrime = primers.begin(); itPrime != primers.end(); itPrime++) {
983                                         if (groupVector[itPrime->second] != "") { //there is a group for this primer
984                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
985                                                 outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
986                                                 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
987                                                 combos[(groupVector[itBar->second] + "." + groupVector[itPrime->second])] = outFASTAVec.size()-1;
988                                                 
989                                                 if(qFileName != ""){
990                                                         outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
991                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
992                                                         outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
993                                                 }
994                                         }
995                                 }
996                         }
997                 }
998                 
999                 numFPrimers = primers.size();
1000                 numRPrimers = revPrimer.size();
1001                 
1002         }
1003         catch(exception& e) {
1004                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1005                 exit(1);
1006         }
1007 }
1008 //***************************************************************************************************************
1009
1010 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
1011         try {
1012                 
1013                 string rawSequence = seq.getUnaligned();
1014                 int success = bdiffs + 1;       //guilty until proven innocent
1015                 
1016                 //can you find the barcode
1017                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1018                         string oligo = it->first;
1019                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
1020                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1021                                 break;  
1022                         }
1023                         
1024                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1025                                 group = it->second;
1026                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1027                                 
1028                                 if(qual.getName() != ""){
1029                                         qual.trimQScores(oligo.length(), -1);
1030                                 }
1031                                 
1032                                 success = 0;
1033                                 break;
1034                         }
1035                 }
1036                 
1037                 //if you found the barcode or if you don't want to allow for diffs
1038 //              cout << success;
1039                 if ((bdiffs == 0) || (success == 0)) { return success;  }
1040                 
1041                 else { //try aligning and see if you can find it
1042 //                      cout << endl;
1043
1044                         int maxLength = 0;
1045
1046                         Alignment* alignment;
1047                         if (barcodes.size() > 0) {
1048                                 map<string,int>::iterator it=barcodes.begin();
1049
1050                                 for(it;it!=barcodes.end();it++){
1051                                         if(it->first.length() > maxLength){
1052                                                 maxLength = it->first.length();
1053                                         }
1054                                 }
1055                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
1056
1057                         }else{ alignment = NULL; } 
1058                         
1059                         //can you find the barcode
1060                         int minDiff = 1e6;
1061                         int minCount = 1;
1062                         int minGroup = -1;
1063                         int minPos = 0;
1064                         
1065                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1066                                 string oligo = it->first;
1067 //                              int length = oligo.length();
1068                                 
1069                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
1070                                         success = bdiffs + 10;
1071                                         break;
1072                                 }
1073                                 
1074                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1075                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1076                                 oligo = alignment->getSeqAAln();
1077                                 string temp = alignment->getSeqBAln();
1078                 
1079                                 int alnLength = oligo.length();
1080                                 
1081                                 for(int i=oligo.length()-1;i>=0;i--){
1082                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1083                                 }
1084                                 oligo = oligo.substr(0,alnLength);
1085                                 temp = temp.substr(0,alnLength);
1086                                 
1087                                 int newStart=0;
1088                                 int numDiff = countDiffs(oligo, temp);
1089                                 
1090 //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
1091                                 
1092                                 if(numDiff < minDiff){
1093                                         minDiff = numDiff;
1094                                         minCount = 1;
1095                                         minGroup = it->second;
1096                                         minPos = 0;
1097                                         for(int i=0;i<alnLength;i++){
1098                                                 if(temp[i] != '-'){
1099                                                         minPos++;
1100                                                 }
1101                                         }
1102                                 }
1103                                 else if(numDiff == minDiff){
1104                                         minCount++;
1105                                 }
1106
1107                         }
1108
1109                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1110                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1111                         else{                                                                                                   //use the best match
1112                                 group = minGroup;
1113                                 seq.setUnaligned(rawSequence.substr(minPos));
1114                                 
1115                                 if(qual.getName() != ""){
1116                                         qual.trimQScores(minPos, -1);
1117                                 }
1118                                 success = minDiff;
1119                         }
1120                         
1121                         if (alignment != NULL) {  delete alignment;  }
1122                         
1123                 }
1124 //              cout << success << endl;
1125                 
1126                 return success;
1127                 
1128         }
1129         catch(exception& e) {
1130                 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1131                 exit(1);
1132         }
1133
1134 }
1135
1136 //***************************************************************************************************************
1137
1138 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1139         try {
1140                 string rawSequence = seq.getUnaligned();
1141                 int success = pdiffs + 1;       //guilty until proven innocent
1142                 
1143                 //can you find the primer
1144                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1145                         string oligo = it->first;
1146                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1147                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1148                                 break;  
1149                         }
1150                         
1151                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1152                                 group = it->second;
1153                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1154                                 if(qual.getName() != ""){
1155                                         qual.trimQScores(oligo.length(), -1);
1156                                 }
1157                                 success = 0;
1158                                 break;
1159                         }
1160                 }
1161
1162                 //if you found the barcode or if you don't want to allow for diffs
1163 //              cout << success;
1164                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1165                 
1166                 else { //try aligning and see if you can find it
1167 //                      cout << endl;
1168
1169                         int maxLength = 0;
1170
1171                         Alignment* alignment;
1172                         if (primers.size() > 0) {
1173                                 map<string,int>::iterator it=primers.begin();
1174
1175                                 for(it;it!=primers.end();it++){
1176                                         if(it->first.length() > maxLength){
1177                                                 maxLength = it->first.length();
1178                                         }
1179                                 }
1180                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
1181
1182                         }else{ alignment = NULL; } 
1183                         
1184                         //can you find the barcode
1185                         int minDiff = 1e6;
1186                         int minCount = 1;
1187                         int minGroup = -1;
1188                         int minPos = 0;
1189                         
1190                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1191                                 string oligo = it->first;
1192 //                              int length = oligo.length();
1193                                 
1194                                 if(rawSequence.length() < maxLength){   
1195                                         success = pdiffs + 100;
1196                                         break;
1197                                 }
1198                                 
1199                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1200                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1201                                 oligo = alignment->getSeqAAln();
1202                                 string temp = alignment->getSeqBAln();
1203                 
1204                                 int alnLength = oligo.length();
1205                                 
1206                                 for(int i=oligo.length()-1;i>=0;i--){
1207                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1208                                 }
1209                                 oligo = oligo.substr(0,alnLength);
1210                                 temp = temp.substr(0,alnLength);
1211                                 
1212                                 int newStart=0;
1213                                 int numDiff = countDiffs(oligo, temp);
1214                                 
1215 //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
1216                                 
1217                                 if(numDiff < minDiff){
1218                                         minDiff = numDiff;
1219                                         minCount = 1;
1220                                         minGroup = it->second;
1221                                         minPos = 0;
1222                                         for(int i=0;i<alnLength;i++){
1223                                                 if(temp[i] != '-'){
1224                                                         minPos++;
1225                                                 }
1226                                         }
1227                                 }
1228                                 else if(numDiff == minDiff){
1229                                         minCount++;
1230                                 }
1231
1232                         }
1233
1234                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1235                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1236                         else{                                                                                                   //use the best match
1237                                 group = minGroup;
1238                                 seq.setUnaligned(rawSequence.substr(minPos));
1239                                 if(qual.getName() != ""){
1240                                         qual.trimQScores(minPos, -1);
1241                                 }
1242                                 success = minDiff;
1243                         }
1244                         
1245                         if (alignment != NULL) {  delete alignment;  }
1246                         
1247                 }
1248                 
1249                 return success;
1250
1251         }
1252         catch(exception& e) {
1253                 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1254                 exit(1);
1255         }
1256 }
1257
1258 //***************************************************************************************************************
1259
1260 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1261         try {
1262                 string rawSequence = seq.getUnaligned();
1263                 bool success = 0;       //guilty until proven innocent
1264                 
1265                 for(int i=0;i<numRPrimers;i++){
1266                         string oligo = revPrimer[i];
1267                         
1268                         if(rawSequence.length() < oligo.length()){
1269                                 success = 0;
1270                                 break;
1271                         }
1272                         
1273                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1274                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1275                                 if(qual.getName() != ""){
1276                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1277                                 }
1278                                 success = 1;
1279                                 break;
1280                         }
1281                 }       
1282                 return success;
1283                 
1284         }
1285         catch(exception& e) {
1286                 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1287                 exit(1);
1288         }
1289 }
1290
1291 //***************************************************************************************************************
1292
1293 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1294         try {
1295                 bool success = 1;
1296                 if(qscores.getName() != ""){
1297                         qscores.trimQScores(-1, keepFirst);
1298                 }
1299                 sequence.trim(keepFirst);
1300                 return success;
1301         }
1302         catch(exception& e) {
1303                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1304                 exit(1);
1305         }
1306         
1307 }       
1308
1309 //***************************************************************************************************************
1310
1311 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1312         try {
1313                 bool success = 0;
1314                 
1315                 int length = sequence.getNumBases() - removeLast;
1316                 
1317                 if(length > 0){
1318                         if(qscores.getName() != ""){
1319                                 qscores.trimQScores(-1, length);
1320                         }
1321                         sequence.trim(length);
1322                         success = 1;
1323                 }
1324                 else{
1325                         success = 0;
1326                 }
1327
1328                 return success;
1329         }
1330         catch(exception& e) {
1331                 m->errorOut(e, "removeLastTrim", "countDiffs");
1332                 exit(1);
1333         }
1334         
1335 }       
1336
1337 //***************************************************************************************************************
1338
1339 bool TrimSeqsCommand::cullLength(Sequence& seq){
1340         try {
1341         
1342                 int length = seq.getNumBases();
1343                 bool success = 0;       //guilty until proven innocent
1344                 
1345                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1346                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1347                 else                                                                                            {       success = 0;    }
1348                 
1349                 return success;
1350         
1351         }
1352         catch(exception& e) {
1353                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1354                 exit(1);
1355         }
1356         
1357 }
1358
1359 //***************************************************************************************************************
1360
1361 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1362         try {
1363                 int longHomoP = seq.getLongHomoPolymer();
1364                 bool success = 0;       //guilty until proven innocent
1365                 
1366                 if(longHomoP <= maxHomoP){      success = 1;    }
1367                 else                                    {       success = 0;    }
1368                 
1369                 return success;
1370         }
1371         catch(exception& e) {
1372                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1373                 exit(1);
1374         }
1375         
1376 }
1377
1378 //***************************************************************************************************************
1379
1380 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1381         try {
1382                 int numNs = seq.getAmbigBases();
1383                 bool success = 0;       //guilty until proven innocent
1384                 
1385                 if(numNs <= maxAmbig)   {       success = 1;    }
1386                 else                                    {       success = 0;    }
1387                 
1388                 return success;
1389         }
1390         catch(exception& e) {
1391                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1392                 exit(1);
1393         }
1394         
1395 }
1396
1397 //***************************************************************************************************************
1398
1399 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1400         try {
1401                 bool success = 1;
1402                 int length = oligo.length();
1403                 
1404                 for(int i=0;i<length;i++){
1405                         
1406                         if(oligo[i] != seq[i]){
1407                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1408                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1409                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1410                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1411                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1412                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1413                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1414                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1415                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1416                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1417                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1418                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1419                                 
1420                                 if(success == 0)        {       break;   }
1421                         }
1422                         else{
1423                                 success = 1;
1424                         }
1425                 }
1426                 
1427                 return success;
1428         }
1429         catch(exception& e) {
1430                 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1431                 exit(1);
1432         }
1433
1434 }
1435 //***************************************************************************************************************
1436
1437 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1438         try {
1439
1440                 int length = oligo.length();
1441                 int countDiffs = 0;
1442                 
1443                 for(int i=0;i<length;i++){
1444                                                                 
1445                         if(oligo[i] != seq[i]){
1446                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1447                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1448                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1449                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1450                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1451                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1452                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1453                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1454                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1455                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1456                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1457                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1458                         }
1459                         
1460                 }
1461                 
1462                 return countDiffs;
1463         }
1464         catch(exception& e) {
1465                 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1466                 exit(1);
1467         }
1468
1469 }
1470
1471 //***************************************************************************************************************