]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
removed various build warnings
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //**********************************************************************************************************************
14
15 vector<string> TrimSeqsCommand::getValidParameters(){   
16         try {
17                 string Array[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile", 
18                                                                         "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
19                                                                         "keepfirst", "removelast",
20                                                                         "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
21                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
22                 return myArray;
23         }
24         catch(exception& e) {
25                 m->errorOut(e, "TrimSeqsCommand", "getValidParameters");
26                 exit(1);
27         }
28 }
29
30 //**********************************************************************************************************************
31
32 TrimSeqsCommand::TrimSeqsCommand(){     
33         try {
34                 abort = true;
35                 //initialize outputTypes
36                 vector<string> tempOutNames;
37                 outputTypes["fasta"] = tempOutNames;
38                 outputTypes["qual"] = tempOutNames;
39                 outputTypes["group"] = tempOutNames;
40         }
41         catch(exception& e) {
42                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
43                 exit(1);
44         }
45 }
46
47 //**********************************************************************************************************************
48
49 vector<string> TrimSeqsCommand::getRequiredParameters(){        
50         try {
51                 string Array[] =  {"fasta"};
52                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
53                 return myArray;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "TrimSeqsCommand", "getRequiredParameters");
57                 exit(1);
58         }
59 }
60
61 //**********************************************************************************************************************
62
63 vector<string> TrimSeqsCommand::getRequiredFiles(){     
64         try {
65                 vector<string> myArray;
66                 return myArray;
67         }
68         catch(exception& e) {
69                 m->errorOut(e, "TrimSeqsCommand", "getRequiredFiles");
70                 exit(1);
71         }
72 }
73
74 //***************************************************************************************************************
75
76 TrimSeqsCommand::TrimSeqsCommand(string option)  {
77         try {
78                 
79                 abort = false;
80                 comboStarts = 0;
81                 
82                 //allow user to run help
83                 if(option == "help") { help(); abort = true; }
84                 
85                 else {
86                         //valid paramters for this command
87                         string AlignArray[] =  {        "fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile", 
88                                                                 "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
89                                                                 "keepfirst", "removelast",
90                                                                 "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
91                         
92                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
93                         
94                         OptionParser parser(option);
95                         map<string,string> parameters = parser.getParameters();
96                         
97                         ValidParameters validParameter;
98                         map<string,string>::iterator it;
99                         
100                         //check to make sure all parameters are valid for command
101                         for (it = parameters.begin(); it != parameters.end(); it++) { 
102                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
103                         }
104                         
105                         //initialize outputTypes
106                         vector<string> tempOutNames;
107                         outputTypes["fasta"] = tempOutNames;
108                         outputTypes["qual"] = tempOutNames;
109                         outputTypes["group"] = tempOutNames;
110                         
111                         //if the user changes the input directory command factory will send this info to us in the output parameter 
112                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
113                         if (inputDir == "not found"){   inputDir = "";          }
114                         else {
115                                 string path;
116                                 it = parameters.find("fasta");
117                                 //user has given a template file
118                                 if(it != parameters.end()){ 
119                                         path = m->hasPath(it->second);
120                                         //if the user has not given a path then, add inputdir. else leave path alone.
121                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
122                                 }
123                                 
124                                 it = parameters.find("oligos");
125                                 //user has given a template file
126                                 if(it != parameters.end()){ 
127                                         path = m->hasPath(it->second);
128                                         //if the user has not given a path then, add inputdir. else leave path alone.
129                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
130                                 }
131                                 
132                                 it = parameters.find("qfile");
133                                 //user has given a template file
134                                 if(it != parameters.end()){ 
135                                         path = m->hasPath(it->second);
136                                         //if the user has not given a path then, add inputdir. else leave path alone.
137                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
138                                 }
139                                 
140                                 it = parameters.find("group");
141                                 //user has given a template file
142                                 if(it != parameters.end()){ 
143                                         path = m->hasPath(it->second);
144                                         //if the user has not given a path then, add inputdir. else leave path alone.
145                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
146                                 }
147                         }
148
149                         
150                         //check for required parameters
151                         fastaFile = validParameter.validFile(parameters, "fasta", true);
152                         if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the trim.seqs command."); m->mothurOutEndLine(); abort = true; }
153                         else if (fastaFile == "not open") { abort = true; }     
154                         
155                         //if the user changes the output directory command factory will send this info to us in the output parameter 
156                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
157                                 outputDir = ""; 
158                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
159                         }
160                 
161                         
162                         //check for optional parameter and set defaults
163                         // ...at some point should added some additional type checking...
164                         string temp;
165                         temp = validParameter.validFile(parameters, "flip", false);
166                         if (temp == "not found"){       flip = 0;       }
167                         else if(m->isTrue(temp))        {       flip = 1;       }
168                 
169                         temp = validParameter.validFile(parameters, "oligos", true);
170                         if (temp == "not found"){       oligoFile = "";         }
171                         else if(temp == "not open"){    abort = true;   } 
172                         else                                    {       oligoFile = temp;               }
173                         
174                         temp = validParameter.validFile(parameters, "group", true);
175                         if (temp == "not found"){       groupfile = "";         }
176                         else if(temp == "not open"){    abort = true;   } 
177                         else                                    {       groupfile = temp;               }
178                         
179                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
180                         convert(temp, maxAmbig);  
181
182                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
183                         convert(temp, maxHomoP);  
184
185                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
186                         convert(temp, minLength); 
187                         
188                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
189                         convert(temp, maxLength);
190                         
191                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
192                         convert(temp, bdiffs);
193                         
194                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
195                         convert(temp, pdiffs);
196                         
197                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
198                         convert(temp, tdiffs);
199                         
200                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
201                         
202                         temp = validParameter.validFile(parameters, "qfile", true);     
203                         if (temp == "not found")        {       qFileName = "";         }
204                         else if(temp == "not open")     {       abort = true;           }
205                         else                                            {       qFileName = temp;       }
206                         
207                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
208                         convert(temp, qThreshold);
209                         
210                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "F"; }
211                         qtrim = m->isTrue(temp);
212
213                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
214                         convert(temp, qRollAverage);
215
216                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
217                         convert(temp, qWindowAverage);
218
219                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
220                         convert(temp, qWindowSize);
221
222                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
223                         convert(temp, qWindowStep);
224
225                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
226                         convert(temp, qAverage);
227
228                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
229                         convert(temp, keepFirst);
230
231                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
232                         convert(temp, removeLast);
233                         
234                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
235                         allFiles = m->isTrue(temp);
236                         
237                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found") { temp = "1"; }
238                         convert(temp, processors); 
239                         
240                         if ((oligoFile != "") && (groupfile != "")) {
241                                 m->mothurOut("You given both a oligos file and a groupfile, only one is allowed."); m->mothurOutEndLine(); abort = true;
242                         }
243                                                                                                 
244                         
245                         if(allFiles && (oligoFile == "") && (groupfile == "")){
246                                 m->mothurOut("You selected allfiles, but didn't enter an oligos or group file.  Ignoring the allfiles request."); m->mothurOutEndLine();
247                         }
248                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
249                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
250                                 qAverage=0;
251                                 qThreshold=0;
252                         }
253                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
254                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
255                                 abort = true;
256                         }
257                 }
258
259         }
260         catch(exception& e) {
261                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
262                 exit(1);
263         }
264 }
265
266 //**********************************************************************************************************************
267
268 void TrimSeqsCommand::help(){
269         try {
270                 m->mothurOut("The trim.seqs command reads a fastaFile and creates .....\n");
271                 m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, group, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n");
272                 m->mothurOut("The fasta parameter is required.\n");
273                 m->mothurOut("The group parameter allows you to enter a group file for your fasta file.\n");
274                 m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n");
275                 m->mothurOut("The oligos parameter .... The default is "".\n");
276                 m->mothurOut("The maxambig parameter .... The default is -1.\n");
277                 m->mothurOut("The maxhomop parameter .... The default is 0.\n");
278                 m->mothurOut("The minlength parameter .... The default is 0.\n");
279                 m->mothurOut("The maxlength parameter .... The default is 0.\n");
280                 m->mothurOut("The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n");
281                 m->mothurOut("The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n");
282                 m->mothurOut("The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n");
283                 m->mothurOut("The qfile parameter .....\n");
284                 m->mothurOut("The qthreshold parameter .... The default is 0.\n");
285                 m->mothurOut("The qaverage parameter .... The default is 0.\n");
286                 m->mothurOut("The allfiles parameter .... The default is F.\n");
287                 m->mothurOut("The qtrim parameter .... The default is F.\n");
288                 m->mothurOut("The trim.seqs command should be in the following format: \n");
289                 m->mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n");
290                 m->mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n");    
291                 m->mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
292                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
293                 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
294
295         }
296         catch(exception& e) {
297                 m->errorOut(e, "TrimSeqsCommand", "help");
298                 exit(1);
299         }
300 }
301
302
303 //***************************************************************************************************************
304
305 TrimSeqsCommand::~TrimSeqsCommand(){    /*      do nothing      */      }
306
307 //***************************************************************************************************************
308
309 int TrimSeqsCommand::execute(){
310         try{
311         
312                 if (abort == true) { return 0; }
313                 
314                 numFPrimers = 0;  //this needs to be initialized
315                 numRPrimers = 0;
316                 vector<string> fastaFileNames;
317                 vector<string> qualFileNames;
318                 
319                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
320                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
321                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
322                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
323                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
324                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
325                 if (qFileName != "") {  outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile);  outputTypes["qual"].push_back(trimQualFile); outputTypes["qual"].push_back(scrapQualFile); }
326                 string groupFile = "";
327                 if (groupfile == "") { groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; }
328                 else{
329                         groupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "trim.groups";
330                         outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile);
331                         groupMap = new GroupMap(groupfile);
332                         groupMap->readMap();
333                         
334                         if(allFiles){
335                                 for (int i = 0; i < groupMap->namesOfGroups.size(); i++) {
336                                         groupToIndex[groupMap->namesOfGroups[i]] = i;
337                                         groupVector.push_back(groupMap->namesOfGroups[i]);
338                                         fastaFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) +  groupMap->namesOfGroups[i] + ".fasta"));
339                                         
340                                         //we append later, so we want to clear file
341                                         ofstream outRemove;
342                                         m->openOutputFile(fastaFileNames[i], outRemove);
343                                         outRemove.close();
344                                         if(qFileName != ""){
345                                                 qualFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) +  groupMap->namesOfGroups[i] + ".qual"));
346                                                 ofstream outRemove2;
347                                                 m->openOutputFile(qualFileNames[i], outRemove2);
348                                                 outRemove2.close();
349                                         }
350                                 }
351                         }
352                         comboStarts = fastaFileNames.size()-1;
353                 }
354                 
355                 if(oligoFile != ""){
356                         outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile);
357                         getOligos(fastaFileNames, qualFileNames);
358                 }
359
360                 vector<unsigned long int> fastaFilePos;
361                 vector<unsigned long int> qFilePos;
362                 
363                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
364                 
365                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
366                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
367                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
368                 }       
369                 if(qFileName == "")     {       qLines = lines; } //files with duds
370                 
371                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
372                                 if(processors == 1){
373                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
374                                 }else{
375                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames); 
376                                 }       
377                 #else
378                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
379                 #endif
380                 
381                 if (m->control_pressed) {  return 0; }                  
382                         
383                 set<string> blanks;
384                 for(int i=0;i<fastaFileNames.size();i++){
385                         if (m->isBlank(fastaFileNames[i])) {   blanks.insert(fastaFileNames[i]);        }
386                         else if (filesToRemove.count(fastaFileNames[i]) > 0) {  remove(fastaFileNames[i].c_str()); }
387                         else {
388                                 ifstream inFASTA;
389                                 string seqName;
390                                 m->openInputFile(fastaFileNames[i], inFASTA);
391                                 ofstream outGroups;
392                                 string outGroupFilename = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + "groups";
393                                 
394                                 //if the fastafile is on the blanks list then the groups file should be as well
395                                 if (blanks.count(fastaFileNames[i]) != 0) { blanks.insert(outGroupFilename); }
396                                 
397                                 m->openOutputFile(outGroupFilename, outGroups);
398                                 outputNames.push_back(outGroupFilename); outputTypes["group"].push_back(outGroupFilename);  
399                                 
400                                 string thisGroup = "";
401                                 if (i > comboStarts) {
402                                         map<string, int>::iterator itCombo;
403                                         for(itCombo=combos.begin();itCombo!=combos.end(); itCombo++){
404                                                 if(itCombo->second == i){       thisGroup = itCombo->first;     combos.erase(itCombo);  break;  }
405                                         }
406                                 }else{ thisGroup = groupVector[i]; }
407                                         
408                                 while(!inFASTA.eof()){
409                                         if(inFASTA.get() == '>'){
410                                                 inFASTA >> seqName;
411                                                 outGroups << seqName << '\t' << thisGroup << endl;
412                                         }
413                                         while (!inFASTA.eof())  {       char c = inFASTA.get(); if (c == 10 || c == 13){        break;  }       }
414                                 }
415                                 outGroups.close();
416                                 inFASTA.close();
417                         }
418                 }
419                 
420                 for (set<string>::iterator itBlanks = blanks.begin(); itBlanks != blanks.end(); itBlanks++) {  remove((*(itBlanks)).c_str()); }
421                 
422                 blanks.clear();
423                 if(qFileName != ""){
424                         for(int i=0;i<qualFileNames.size();i++){
425                                 if (m->isBlank(qualFileNames[i])) {  blanks.insert(qualFileNames[i]);   }
426                                 else if (filesToRemove.count(qualFileNames[i]) > 0) {  remove(qualFileNames[i].c_str()); }
427                         }
428                 }
429                 
430                 for (set<string>::iterator itBlanks = blanks.begin(); itBlanks != blanks.end(); itBlanks++) {  remove((*(itBlanks)).c_str()); }
431                 
432                 if (m->control_pressed) { 
433                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
434                         return 0;
435                 }
436
437                 m->mothurOutEndLine();
438                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
439                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
440                 m->mothurOutEndLine();
441                 
442                 return 0;       
443                         
444         }
445         catch(exception& e) {
446                 m->errorOut(e, "TrimSeqsCommand", "execute");
447                 exit(1);
448         }
449 }
450                 
451 /**************************************************************************************/
452
453 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames, linePair* line, linePair* qline) {      
454                 
455         try {
456                 
457                 ofstream outFASTA;
458                 m->openOutputFile(trimFile, outFASTA);
459                 
460                 ofstream scrapFASTA;
461                 m->openOutputFile(scrapFile, scrapFASTA);
462                 
463                 ofstream outQual;
464                 ofstream scrapQual;
465                 if(qFileName != ""){
466                         m->openOutputFile(trimQFile, outQual);
467                         m->openOutputFile(scrapQFile, scrapQual);
468                 }
469                 
470                 ofstream outGroups;
471                 
472                 if (oligoFile != "") {          
473                         m->openOutputFile(groupFile, outGroups);   
474                 }
475                 
476                 ifstream inFASTA;
477                 m->openInputFile(filename, inFASTA);
478                 inFASTA.seekg(line->start);
479                 
480                 ifstream qFile;
481                 if(qFileName != "")     {       m->openInputFile(qFileName, qFile);     qFile.seekg(qline->start);  }
482                 
483                 
484                 for (int i = 0; i < fastaNames.size(); i++) { //clears old file
485                         ofstream temp;
486                         m->openOutputFile(fastaNames[i], temp);
487                         temp.close();
488                 }
489                 for (int i = 0; i < qualNames.size(); i++) { //clears old file
490                         ofstream temp;
491                         m->openOutputFile(qualNames[i], temp);
492                         temp.close();
493                 }
494                 
495                         
496                 bool done = false;
497                 int count = 0;
498         
499                 while (!done) {
500                                 
501                         if (m->control_pressed) { 
502                                 inFASTA.close(); outFASTA.close(); scrapFASTA.close();
503                                 if (oligoFile != "") {   outGroups.close();   }
504
505                                 if(qFileName != ""){
506                                         qFile.close();
507                                 }
508                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
509
510                                 return 0;
511                         }
512                         
513                         int success = 1;
514                         
515
516                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
517
518                         QualityScores currQual;
519                         if(qFileName != ""){
520                                 currQual = QualityScores(qFile, currSeq.getNumBases());  m->gobble(qFile);
521                         }
522                         
523                         string origSeq = currSeq.getUnaligned();
524                         if (origSeq != "") {
525                                 int groupBar, groupPrime;
526                                 string trashCode = "";
527                                 int currentSeqsDiffs = 0;
528
529                                 if(barcodes.size() != 0){
530                                         success = stripBarcode(currSeq, currQual, groupBar);
531                                         if(success > bdiffs)            {       trashCode += 'b';       }
532                                         else{ currentSeqsDiffs += success;  }
533                                 }
534                                 
535                                 if(numFPrimers != 0){
536                                         success = stripForward(currSeq, currQual, groupPrime);
537                                         if(success > pdiffs)            {       trashCode += 'f';       }
538                                         else{ currentSeqsDiffs += success;  }
539                                 }
540                                 
541                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
542                                 
543                                 if(numRPrimers != 0){
544                                         success = stripReverse(currSeq, currQual);
545                                         if(!success)                            {       trashCode += 'r';       }
546                                 }
547
548                                 if(keepFirst != 0){
549                                         success = keepFirstTrim(currSeq, currQual);
550                                 }
551                                 
552                                 if(removeLast != 0){
553                                         success = removeLastTrim(currSeq, currQual);
554                                         if(!success)                            {       trashCode += 'l';       }
555                                 }
556
557                                 
558                                 if(qFileName != ""){
559                                         
560                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
561                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
562                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
563                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
564                                         else                                            {       success = 1;                            }
565
566 //                                      if (qtrim == 1 && (origSeq.length() != currSeq.getUnaligned().length())) { 
567 //                                              success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
568 //                                      }
569                                         
570                                         if(!success)                            {       trashCode += 'q';       }
571                                 }                               
572                 
573                                 if(minLength > 0 || maxLength > 0){
574                                         success = cullLength(currSeq);
575                                         if(!success)                            {       trashCode += 'l';       }
576                                 }
577                                 if(maxHomoP > 0){
578                                         success = cullHomoP(currSeq);
579                                         if(!success)                            {       trashCode += 'h';       }
580                                 }
581                                 if(maxAmbig != -1){
582                                         success = cullAmbigs(currSeq);
583                                         if(!success)                            {       trashCode += 'n';       }
584                                 }
585                                 
586                                 if(flip){               // should go last                       
587                                         currSeq.reverseComplement();
588                                         currQual.flipQScores(); 
589                                 }
590                                 
591                                 if(trashCode.length() == 0){
592                                         currSeq.setAligned(currSeq.getUnaligned());
593                                         currSeq.printSequence(outFASTA);
594                                         currQual.printQScores(outQual);
595                                         
596                                         if(barcodes.size() != 0){
597                                                 string thisGroup = groupVector[groupBar];
598                                                 int indexToFastaFile = groupBar;
599                                                 if (primers.size() != 0){
600                                                         //does this primer have a group
601                                                         if (groupVector[groupPrime] != "") {  
602                                                                 thisGroup += "." + groupVector[groupPrime]; 
603                                                                 indexToFastaFile = combos[thisGroup];
604                                                         }
605                                                 }
606                                                 outGroups << currSeq.getName() << '\t' << thisGroup << endl;
607                                                 if(allFiles){
608                                                         ofstream outTemp;
609                                                         m->openOutputFileAppend(fastaNames[indexToFastaFile], outTemp);
610                                                         //currSeq.printSequence(*fastaFileNames[indexToFastaFile]);
611                                                         currSeq.printSequence(outTemp);
612                                                         outTemp.close();
613                                                         
614                                                         if(qFileName != ""){
615                                                                 //currQual.printQScores(*qualFileNames[indexToFastaFile]);
616                                                                 ofstream outTemp2;
617                                                                 m->openOutputFileAppend(qualNames[indexToFastaFile], outTemp2);
618                                                                 currQual.printQScores(outTemp2);
619                                                                 outTemp2.close();                                                       
620                                                         }
621                                                 }
622                                         }
623                                         
624                                         if (groupfile != "") {
625                                                 string thisGroup = groupMap->getGroup(currSeq.getName());
626                                                 
627                                                 if (thisGroup != "not found") {
628                                                         outGroups << currSeq.getName() << '\t' << thisGroup << endl;
629                                                         if (allFiles) {
630                                                                 ofstream outTemp;
631                                                                 m->openOutputFileAppend(fastaNames[groupToIndex[thisGroup]], outTemp);
632                                                                 currSeq.printSequence(outTemp);
633                                                                 outTemp.close();
634                                                                 if(qFileName != ""){
635                                                                         ofstream outTemp2;
636                                                                         m->openOutputFileAppend(qualNames[groupToIndex[thisGroup]], outTemp2);
637                                                                         currQual.printQScores(outTemp2);
638                                                                         outTemp2.close();                                                       
639                                                                 }
640                                                         }
641                                                 }else{
642                                                         m->mothurOut(currSeq.getName() + " is not in your groupfile, adding to group XXX."); m->mothurOutEndLine();
643                                                         outGroups << currSeq.getName() << '\t' << "XXX" << endl;
644                                                         if (allFiles) {  
645                                                                 m->mothurOut("[ERROR]: " + currSeq.getName() + " will not be added to any .group.fasta or .group.qual file."); m->mothurOutEndLine();
646                                                         }
647                                                 }
648                                         }
649                                 }
650                                 else{
651                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
652                                         currSeq.setUnaligned(origSeq);
653                                         currSeq.setAligned(origSeq);
654                                         currSeq.printSequence(scrapFASTA);
655                                         currQual.printQScores(scrapQual);
656                                 }
657                                 count++;
658                         }
659                         
660                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
661                                 unsigned long int pos = inFASTA.tellg();
662                                 if ((pos == -1) || (pos >= line->end)) { break; }
663                         #else
664                                 if (inFASTA.eof()) { break; }
665                         #endif
666                                 
667                         //report progress
668                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
669                         
670                 }
671                 //report progress
672                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
673
674                 
675                 inFASTA.close();
676                 outFASTA.close();
677                 scrapFASTA.close();
678                 if (oligoFile != "") {   outGroups.close();   }
679                 if(qFileName != "")     {       qFile.close();  scrapQual.close(); outQual.close();     }
680                 
681                 return count;
682         }
683         catch(exception& e) {
684                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
685                 exit(1);
686         }
687 }
688
689 /**************************************************************************************************/
690
691 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames) {
692         try {
693 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
694                 int process = 1;
695                 int exitCommand = 1;
696                 processIDS.clear();
697                 
698                 //loop through and create all the processes you want
699                 while (process != processors) {
700                         int pid = fork();
701                         
702                         if (pid > 0) {
703                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
704                                 process++;
705                         }else if (pid == 0){
706                                 for (int i = 0; i < fastaNames.size(); i++) {
707                                         fastaNames[i] = (fastaNames[i] + toString(getpid()) + ".temp");
708                                         //clear old file if it exists
709                                         ofstream temp;
710                                         m->openOutputFile(fastaNames[i], temp);
711                                         temp.close();
712                                         if(qFileName != ""){
713                                                 qualNames[i] = (qualNames[i] + toString(getpid()) + ".temp");
714                                                 //clear old file if it exists
715                                                 ofstream temp2;
716                                                 m->openOutputFile(qualNames[i], temp2);
717                                                 temp2.close();
718                                         }
719                                 }
720                                 
721                                 driverCreateTrim(filename, qFileName, (trimFile + toString(getpid()) + ".temp"), (scrapFile + toString(getpid()) + ".temp"), (trimQFile + toString(getpid()) + ".temp"), (scrapQFile + toString(getpid()) + ".temp"), (groupFile + toString(getpid()) + ".temp"), fastaNames, qualNames, lines[process], qLines[process]);
722                                 exit(0);
723                         }else { 
724                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
725                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
726                                 exit(0);
727                         }
728                 }
729                 
730                 //parent do my part
731                 for (int i = 0; i < fastaNames.size(); i++) {
732                         //clear old file if it exists
733                         ofstream temp;
734                         m->openOutputFile(fastaNames[i], temp);
735                         temp.close();
736                         if(qFileName != ""){
737                                 //clear old file if it exists
738                                 ofstream temp2;
739                                 m->openOutputFile(qualNames[i], temp2);
740                                 temp2.close();
741                         }
742                 }
743                 
744                 driverCreateTrim(filename, qFileName, trimFile, scrapFile, trimQFile, scrapQFile, groupFile, fastaNames, qualNames, lines[0], qLines[0]);
745                 
746                 
747                 //force parent to wait until all the processes are done
748                 for (int i=0;i<processIDS.size();i++) { 
749                         int temp = processIDS[i];
750                         wait(&temp);
751                 }
752                 
753                 //append files
754                 for(int i=0;i<processIDS.size();i++){
755                         
756                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
757                         
758                         m->appendFiles((trimFile + toString(processIDS[i]) + ".temp"), trimFile);
759                         remove((trimFile + toString(processIDS[i]) + ".temp").c_str());
760                         m->appendFiles((scrapFile + toString(processIDS[i]) + ".temp"), scrapFile);
761                         remove((scrapFile + toString(processIDS[i]) + ".temp").c_str());
762                         
763                         m->mothurOut("Done with fasta files"); m->mothurOutEndLine();
764                         
765                         if(qFileName != ""){
766                                 m->appendFiles((trimQFile + toString(processIDS[i]) + ".temp"), trimQFile);
767                                 remove((trimQFile + toString(processIDS[i]) + ".temp").c_str());
768                                 m->appendFiles((scrapQFile + toString(processIDS[i]) + ".temp"), scrapQFile);
769                                 remove((scrapQFile + toString(processIDS[i]) + ".temp").c_str());
770                         
771                                 m->mothurOut("Done with quality files"); m->mothurOutEndLine();
772                         }
773                         
774                         m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
775                         remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
776                         
777                         m->mothurOut("Done with group file"); m->mothurOutEndLine();
778                         
779                         for (int j = 0; j < fastaNames.size(); j++) {
780                                 m->appendFiles((fastaNames[j] + toString(processIDS[i]) + ".temp"), fastaNames[j]);
781                                 remove((fastaNames[j] + toString(processIDS[i]) + ".temp").c_str());
782                         }
783                         
784                         if(qFileName != ""){
785                                 for (int j = 0; j < qualNames.size(); j++) {
786                                         m->appendFiles((qualNames[j] + toString(processIDS[i]) + ".temp"), qualNames[j]);
787                                         remove((qualNames[j] + toString(processIDS[i]) + ".temp").c_str());
788                                 }
789                         }
790                         
791                         if (allFiles) { m->mothurOut("Done with allfiles"); m->mothurOutEndLine(); }
792                 }
793         
794                 return exitCommand;
795 #endif          
796         }
797         catch(exception& e) {
798                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
799                 exit(1);
800         }
801 }
802
803 /**************************************************************************************************/
804
805 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
806         try {
807                 
808                 //set file positions for fasta file
809                 fastaFilePos = m->divideFile(filename, processors);
810                 
811                 if (qfilename == "") { return processors; }
812                 
813                 //get name of first sequence in each chunk
814                 map<string, int> firstSeqNames;
815                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
816                         ifstream in;
817                         m->openInputFile(filename, in);
818                         in.seekg(fastaFilePos[i]);
819                 
820                         Sequence temp(in); 
821                         firstSeqNames[temp.getName()] = i;
822                 
823                         in.close();
824                 }
825                                 
826                 //seach for filePos of each first name in the qfile and save in qfileFilePos
827                 ifstream inQual;
828                 m->openInputFile(qfilename, inQual);
829                 
830                 string input;
831                 while(!inQual.eof()){   
832                         input = m->getline(inQual);
833
834                         if (input.length() != 0) {
835                                 if(input[0] == '>'){ //this is a sequence name line
836                                         istringstream nameStream(input);
837                                         
838                                         string sname = "";  nameStream >> sname;
839                                         sname = sname.substr(1);
840                                         
841                                         map<string, int>::iterator it = firstSeqNames.find(sname);
842                                         
843                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
844                                                 unsigned long int pos = inQual.tellg(); 
845                                                 qfileFilePos.push_back(pos - input.length() - 1);       
846                                                 firstSeqNames.erase(it);
847                                         }
848                                 }
849                         }
850                         
851                         if (firstSeqNames.size() == 0) { break; }
852                 }
853                 inQual.close();
854                 
855                 
856                 if (firstSeqNames.size() != 0) { 
857                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
858                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
859                         }
860                         qFileName = "";
861                         return processors;
862                 }
863
864                 //get last file position of qfile
865                 FILE * pFile;
866                 unsigned long int size;
867                 
868                 //get num bytes in file
869                 pFile = fopen (qfilename.c_str(),"rb");
870                 if (pFile==NULL) perror ("Error opening file");
871                 else{
872                         fseek (pFile, 0, SEEK_END);
873                         size=ftell (pFile);
874                         fclose (pFile);
875                 }
876                 
877                 qfileFilePos.push_back(size);
878                 
879                 return processors;
880         }
881         catch(exception& e) {
882                 m->errorOut(e, "TrimSeqsCommand", "setLines");
883                 exit(1);
884         }
885 }
886
887 //***************************************************************************************************************
888
889 void TrimSeqsCommand::getOligos(vector<string>& outFASTAVec, vector<string>& outQualVec){
890         try {
891                 ifstream inOligos;
892                 m->openInputFile(oligoFile, inOligos);
893                 
894                 ofstream test;
895                 
896                 string type, oligo, group;
897                 int index=0;
898                 //int indexPrimer = 0;
899                 
900                 while(!inOligos.eof()){
901                         inOligos >> type; m->gobble(inOligos);
902                                         
903                         if(type[0] == '#'){
904                                 while (!inOligos.eof()) {       char c = inOligos.get(); if (c == 10 || c == 13){       break;  }       } // get rest of line if there's any crap there
905                         }
906                         else{
907                                 //make type case insensitive
908                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
909                                 
910                                 inOligos >> oligo;
911                                 
912                                 for(int i=0;i<oligo.length();i++){
913                                         oligo[i] = toupper(oligo[i]);
914                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
915                                 }
916                                 
917                                 if(type == "FORWARD"){
918                                         group = "";
919                                         
920                                         // get rest of line in case there is a primer name
921                                         while (!inOligos.eof()) {       
922                                                 char c = inOligos.get(); 
923                                                 if (c == 10 || c == 13){        break;  }
924                                                 else if (c == 32 || c == 9){;} //space or tab
925                                                 else {  group += c;  }
926                                         } 
927                                         
928                                         //check for repeat barcodes
929                                         map<string, int>::iterator itPrime = primers.find(oligo);
930                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
931                                         
932                                                 primers[oligo]=index; index++;
933                                                 groupVector.push_back(group);
934                                         
935                                                 if(allFiles){
936                                                         outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
937                                                         if(qFileName != ""){
938                                                                 outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
939                                                         }
940                                                         if (group == "") { //if there is not a group for this primer, then this file will not get written to, but we add it to keep the indexes correct
941                                                                 filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
942                                                                 if(qFileName != ""){
943                                                                         filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
944                                                                 }
945                                                         }else {
946                                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
947                                                                 outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
948                                                                 if(qFileName != ""){
949                                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
950                                                                         outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
951                                                                 }                                                       
952                                                         }
953                                                 }
954                                         
955                                 }
956                                 else if(type == "REVERSE"){
957                                         Sequence oligoRC("reverse", oligo);
958                                         oligoRC.reverseComplement();
959                                         revPrimer.push_back(oligoRC.getUnaligned());
960                                 }
961                                 else if(type == "BARCODE"){
962                                         inOligos >> group;
963                                         
964                                         //check for repeat barcodes
965                                         map<string, int>::iterator itBar = barcodes.find(oligo);
966                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
967                                         
968                                                 barcodes[oligo]=index; index++;
969                                                 groupVector.push_back(group);
970                                                 
971                                                 if(allFiles){
972                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
973                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
974                                                         outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
975                                                         if(qFileName != ""){
976                                                                 outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
977                                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
978                                                                 outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
979                                                         }                                                       
980                                                 }
981                                         
982                                 }else{  m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
983                         }
984                         m->gobble(inOligos);
985                 }
986                 
987                 inOligos.close();
988                 
989                 //add in potential combos
990                 if(allFiles){
991                         comboStarts = outFASTAVec.size()-1;
992                         for (map<string, int>::iterator itBar = barcodes.begin(); itBar != barcodes.end(); itBar++) {
993                                 for (map<string, int>::iterator itPrime = primers.begin(); itPrime != primers.end(); itPrime++) {
994                                         if (groupVector[itPrime->second] != "") { //there is a group for this primer
995                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
996                                                 outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
997                                                 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
998                                                 combos[(groupVector[itBar->second] + "." + groupVector[itPrime->second])] = outFASTAVec.size()-1;
999                                                 
1000                                                 if(qFileName != ""){
1001                                                         outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
1002                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
1003                                                         outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
1004                                                 }
1005                                         }
1006                                 }
1007                         }
1008                 }
1009                 
1010                 numFPrimers = primers.size();
1011                 numRPrimers = revPrimer.size();
1012                 
1013         }
1014         catch(exception& e) {
1015                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1016                 exit(1);
1017         }
1018 }
1019 //***************************************************************************************************************
1020
1021 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
1022         try {
1023                 
1024                 string rawSequence = seq.getUnaligned();
1025                 int success = bdiffs + 1;       //guilty until proven innocent
1026                 
1027                 //can you find the barcode
1028                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1029                         string oligo = it->first;
1030                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
1031                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1032                                 break;  
1033                         }
1034                         
1035                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1036                                 group = it->second;
1037                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1038                                 
1039                                 if(qual.getName() != ""){
1040                                         qual.trimQScores(oligo.length(), -1);
1041                                 }
1042                                 
1043                                 success = 0;
1044                                 break;
1045                         }
1046                 }
1047                 
1048                 //if you found the barcode or if you don't want to allow for diffs
1049 //              cout << success;
1050                 if ((bdiffs == 0) || (success == 0)) { return success;  }
1051                 
1052                 else { //try aligning and see if you can find it
1053 //                      cout << endl;
1054
1055                         int maxLength = 0;
1056
1057                         Alignment* alignment;
1058                         if (barcodes.size() > 0) {
1059                                 map<string,int>::iterator it=barcodes.begin();
1060
1061                                 for(it;it!=barcodes.end();it++){
1062                                         if(it->first.length() > maxLength){
1063                                                 maxLength = it->first.length();
1064                                         }
1065                                 }
1066                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
1067
1068                         }else{ alignment = NULL; } 
1069                         
1070                         //can you find the barcode
1071                         int minDiff = 1e6;
1072                         int minCount = 1;
1073                         int minGroup = -1;
1074                         int minPos = 0;
1075                         
1076                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1077                                 string oligo = it->first;
1078 //                              int length = oligo.length();
1079                                 
1080                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
1081                                         success = bdiffs + 10;
1082                                         break;
1083                                 }
1084                                 
1085                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1086                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1087                                 oligo = alignment->getSeqAAln();
1088                                 string temp = alignment->getSeqBAln();
1089                 
1090                                 int alnLength = oligo.length();
1091                                 
1092                                 for(int i=oligo.length()-1;i>=0;i--){
1093                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1094                                 }
1095                                 oligo = oligo.substr(0,alnLength);
1096                                 temp = temp.substr(0,alnLength);
1097                                 
1098                                 int numDiff = countDiffs(oligo, temp);
1099                                 
1100 //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
1101                                 
1102                                 if(numDiff < minDiff){
1103                                         minDiff = numDiff;
1104                                         minCount = 1;
1105                                         minGroup = it->second;
1106                                         minPos = 0;
1107                                         for(int i=0;i<alnLength;i++){
1108                                                 if(temp[i] != '-'){
1109                                                         minPos++;
1110                                                 }
1111                                         }
1112                                 }
1113                                 else if(numDiff == minDiff){
1114                                         minCount++;
1115                                 }
1116
1117                         }
1118
1119                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1120                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1121                         else{                                                                                                   //use the best match
1122                                 group = minGroup;
1123                                 seq.setUnaligned(rawSequence.substr(minPos));
1124                                 
1125                                 if(qual.getName() != ""){
1126                                         qual.trimQScores(minPos, -1);
1127                                 }
1128                                 success = minDiff;
1129                         }
1130                         
1131                         if (alignment != NULL) {  delete alignment;  }
1132                         
1133                 }
1134 //              cout << success << endl;
1135                 
1136                 return success;
1137                 
1138         }
1139         catch(exception& e) {
1140                 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1141                 exit(1);
1142         }
1143
1144 }
1145
1146 //***************************************************************************************************************
1147
1148 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1149         try {
1150                 string rawSequence = seq.getUnaligned();
1151                 int success = pdiffs + 1;       //guilty until proven innocent
1152                 
1153                 //can you find the primer
1154                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1155                         string oligo = it->first;
1156                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1157                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1158                                 break;  
1159                         }
1160                         
1161                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1162                                 group = it->second;
1163                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1164                                 if(qual.getName() != ""){
1165                                         qual.trimQScores(oligo.length(), -1);
1166                                 }
1167                                 success = 0;
1168                                 break;
1169                         }
1170                 }
1171
1172                 //if you found the barcode or if you don't want to allow for diffs
1173 //              cout << success;
1174                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1175                 
1176                 else { //try aligning and see if you can find it
1177 //                      cout << endl;
1178
1179                         int maxLength = 0;
1180
1181                         Alignment* alignment;
1182                         if (primers.size() > 0) {
1183                                 map<string,int>::iterator it=primers.begin();
1184
1185                                 for(it;it!=primers.end();it++){
1186                                         if(it->first.length() > maxLength){
1187                                                 maxLength = it->first.length();
1188                                         }
1189                                 }
1190                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
1191
1192                         }else{ alignment = NULL; } 
1193                         
1194                         //can you find the barcode
1195                         int minDiff = 1e6;
1196                         int minCount = 1;
1197                         int minGroup = -1;
1198                         int minPos = 0;
1199                         
1200                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1201                                 string oligo = it->first;
1202 //                              int length = oligo.length();
1203                                 
1204                                 if(rawSequence.length() < maxLength){   
1205                                         success = pdiffs + 100;
1206                                         break;
1207                                 }
1208                                 
1209                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1210                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1211                                 oligo = alignment->getSeqAAln();
1212                                 string temp = alignment->getSeqBAln();
1213                 
1214                                 int alnLength = oligo.length();
1215                                 
1216                                 for(int i=oligo.length()-1;i>=0;i--){
1217                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1218                                 }
1219                                 oligo = oligo.substr(0,alnLength);
1220                                 temp = temp.substr(0,alnLength);
1221                                 
1222                                 int numDiff = countDiffs(oligo, temp);
1223                                 
1224 //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
1225                                 
1226                                 if(numDiff < minDiff){
1227                                         minDiff = numDiff;
1228                                         minCount = 1;
1229                                         minGroup = it->second;
1230                                         minPos = 0;
1231                                         for(int i=0;i<alnLength;i++){
1232                                                 if(temp[i] != '-'){
1233                                                         minPos++;
1234                                                 }
1235                                         }
1236                                 }
1237                                 else if(numDiff == minDiff){
1238                                         minCount++;
1239                                 }
1240
1241                         }
1242
1243                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1244                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1245                         else{                                                                                                   //use the best match
1246                                 group = minGroup;
1247                                 seq.setUnaligned(rawSequence.substr(minPos));
1248                                 if(qual.getName() != ""){
1249                                         qual.trimQScores(minPos, -1);
1250                                 }
1251                                 success = minDiff;
1252                         }
1253                         
1254                         if (alignment != NULL) {  delete alignment;  }
1255                         
1256                 }
1257                 
1258                 return success;
1259
1260         }
1261         catch(exception& e) {
1262                 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1263                 exit(1);
1264         }
1265 }
1266
1267 //***************************************************************************************************************
1268
1269 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1270         try {
1271                 string rawSequence = seq.getUnaligned();
1272                 bool success = 0;       //guilty until proven innocent
1273                 
1274                 for(int i=0;i<numRPrimers;i++){
1275                         string oligo = revPrimer[i];
1276                         
1277                         if(rawSequence.length() < oligo.length()){
1278                                 success = 0;
1279                                 break;
1280                         }
1281                         
1282                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1283                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1284                                 if(qual.getName() != ""){
1285                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1286                                 }
1287                                 success = 1;
1288                                 break;
1289                         }
1290                 }       
1291                 return success;
1292                 
1293         }
1294         catch(exception& e) {
1295                 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1296                 exit(1);
1297         }
1298 }
1299
1300 //***************************************************************************************************************
1301
1302 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1303         try {
1304                 bool success = 1;
1305                 if(qscores.getName() != ""){
1306                         qscores.trimQScores(-1, keepFirst);
1307                 }
1308                 sequence.trim(keepFirst);
1309                 return success;
1310         }
1311         catch(exception& e) {
1312                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1313                 exit(1);
1314         }
1315         
1316 }       
1317
1318 //***************************************************************************************************************
1319
1320 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1321         try {
1322                 bool success = 0;
1323                 
1324                 int length = sequence.getNumBases() - removeLast;
1325                 
1326                 if(length > 0){
1327                         if(qscores.getName() != ""){
1328                                 qscores.trimQScores(-1, length);
1329                         }
1330                         sequence.trim(length);
1331                         success = 1;
1332                 }
1333                 else{
1334                         success = 0;
1335                 }
1336
1337                 return success;
1338         }
1339         catch(exception& e) {
1340                 m->errorOut(e, "removeLastTrim", "countDiffs");
1341                 exit(1);
1342         }
1343         
1344 }       
1345
1346 //***************************************************************************************************************
1347
1348 bool TrimSeqsCommand::cullLength(Sequence& seq){
1349         try {
1350         
1351                 int length = seq.getNumBases();
1352                 bool success = 0;       //guilty until proven innocent
1353                 
1354                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1355                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1356                 else                                                                                            {       success = 0;    }
1357                 
1358                 return success;
1359         
1360         }
1361         catch(exception& e) {
1362                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1363                 exit(1);
1364         }
1365         
1366 }
1367
1368 //***************************************************************************************************************
1369
1370 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1371         try {
1372                 int longHomoP = seq.getLongHomoPolymer();
1373                 bool success = 0;       //guilty until proven innocent
1374                 
1375                 if(longHomoP <= maxHomoP){      success = 1;    }
1376                 else                                    {       success = 0;    }
1377                 
1378                 return success;
1379         }
1380         catch(exception& e) {
1381                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1382                 exit(1);
1383         }
1384         
1385 }
1386
1387 //***************************************************************************************************************
1388
1389 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1390         try {
1391                 int numNs = seq.getAmbigBases();
1392                 bool success = 0;       //guilty until proven innocent
1393                 
1394                 if(numNs <= maxAmbig)   {       success = 1;    }
1395                 else                                    {       success = 0;    }
1396                 
1397                 return success;
1398         }
1399         catch(exception& e) {
1400                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1401                 exit(1);
1402         }
1403         
1404 }
1405
1406 //***************************************************************************************************************
1407
1408 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1409         try {
1410                 bool success = 1;
1411                 int length = oligo.length();
1412                 
1413                 for(int i=0;i<length;i++){
1414                         
1415                         if(oligo[i] != seq[i]){
1416                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1417                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1418                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1419                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1420                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1421                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1422                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1423                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1424                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1425                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1426                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1427                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1428                                 
1429                                 if(success == 0)        {       break;   }
1430                         }
1431                         else{
1432                                 success = 1;
1433                         }
1434                 }
1435                 
1436                 return success;
1437         }
1438         catch(exception& e) {
1439                 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1440                 exit(1);
1441         }
1442
1443 }
1444
1445 //***************************************************************************************************************
1446
1447 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1448         try {
1449
1450                 int length = oligo.length();
1451                 int countDiffs = 0;
1452                 
1453                 for(int i=0;i<length;i++){
1454                                                                 
1455                         if(oligo[i] != seq[i]){
1456                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1457                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1458                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1459                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1460                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1461                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1462                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1463                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1464                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1465                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1466                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1467                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1468                         }
1469                         
1470                 }
1471                 
1472                 return countDiffs;
1473         }
1474         catch(exception& e) {
1475                 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1476                 exit(1);
1477         }
1478
1479 }
1480
1481 //***************************************************************************************************************