]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
added [ERROR] flag if command aborts
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //**********************************************************************************************************************
14
15 vector<string> TrimSeqsCommand::getValidParameters(){   
16         try {
17                 string Array[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile", 
18                                                                         "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
19                                                                         "keepfirst", "removelast",
20                                                                         "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
21                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
22                 return myArray;
23         }
24         catch(exception& e) {
25                 m->errorOut(e, "TrimSeqsCommand", "getValidParameters");
26                 exit(1);
27         }
28 }
29
30 //**********************************************************************************************************************
31
32 TrimSeqsCommand::TrimSeqsCommand(){     
33         try {
34                 abort = true; calledHelp = true; 
35                 vector<string> tempOutNames;
36                 outputTypes["fasta"] = tempOutNames;
37                 outputTypes["qual"] = tempOutNames;
38                 outputTypes["group"] = tempOutNames;
39         }
40         catch(exception& e) {
41                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
42                 exit(1);
43         }
44 }
45
46 //**********************************************************************************************************************
47
48 vector<string> TrimSeqsCommand::getRequiredParameters(){        
49         try {
50                 string Array[] =  {"fasta"};
51                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
52                 return myArray;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "TrimSeqsCommand", "getRequiredParameters");
56                 exit(1);
57         }
58 }
59
60 //**********************************************************************************************************************
61
62 vector<string> TrimSeqsCommand::getRequiredFiles(){     
63         try {
64                 vector<string> myArray;
65                 return myArray;
66         }
67         catch(exception& e) {
68                 m->errorOut(e, "TrimSeqsCommand", "getRequiredFiles");
69                 exit(1);
70         }
71 }
72
73 //***************************************************************************************************************
74
75 TrimSeqsCommand::TrimSeqsCommand(string option)  {
76         try {
77                 
78                 abort = false; calledHelp = false;   
79                 comboStarts = 0;
80                 
81                 //allow user to run help
82                 if(option == "help") { help(); abort = true; calledHelp = true; }
83                 
84                 else {
85                         //valid paramters for this command
86                         string AlignArray[] =  {        "fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile", 
87                                                                 "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage",
88                                                                 "keepfirst", "removelast",
89                                                                 "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
90                         
91                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
92                         
93                         OptionParser parser(option);
94                         map<string,string> parameters = parser.getParameters();
95                         
96                         ValidParameters validParameter;
97                         map<string,string>::iterator it;
98                         
99                         //check to make sure all parameters are valid for command
100                         for (it = parameters.begin(); it != parameters.end(); it++) { 
101                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
102                         }
103                         
104                         //initialize outputTypes
105                         vector<string> tempOutNames;
106                         outputTypes["fasta"] = tempOutNames;
107                         outputTypes["qual"] = tempOutNames;
108                         outputTypes["group"] = tempOutNames;
109                         
110                         //if the user changes the input directory command factory will send this info to us in the output parameter 
111                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
112                         if (inputDir == "not found"){   inputDir = "";          }
113                         else {
114                                 string path;
115                                 it = parameters.find("fasta");
116                                 //user has given a template file
117                                 if(it != parameters.end()){ 
118                                         path = m->hasPath(it->second);
119                                         //if the user has not given a path then, add inputdir. else leave path alone.
120                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
121                                 }
122                                 
123                                 it = parameters.find("oligos");
124                                 //user has given a template file
125                                 if(it != parameters.end()){ 
126                                         path = m->hasPath(it->second);
127                                         //if the user has not given a path then, add inputdir. else leave path alone.
128                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
129                                 }
130                                 
131                                 it = parameters.find("qfile");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
137                                 }
138                                 
139                                 it = parameters.find("group");
140                                 //user has given a template file
141                                 if(it != parameters.end()){ 
142                                         path = m->hasPath(it->second);
143                                         //if the user has not given a path then, add inputdir. else leave path alone.
144                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
145                                 }
146                         }
147
148                         
149                         //check for required parameters
150                         fastaFile = validParameter.validFile(parameters, "fasta", true);
151                         if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the trim.seqs command."); m->mothurOutEndLine(); abort = true; }
152                         else if (fastaFile == "not open") { abort = true; }     
153                         
154                         //if the user changes the output directory command factory will send this info to us in the output parameter 
155                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
156                                 outputDir = ""; 
157                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
158                         }
159                 
160                         
161                         //check for optional parameter and set defaults
162                         // ...at some point should added some additional type checking...
163                         string temp;
164                         temp = validParameter.validFile(parameters, "flip", false);
165                         if (temp == "not found"){       flip = 0;       }
166                         else if(m->isTrue(temp))        {       flip = 1;       }
167                 
168                         temp = validParameter.validFile(parameters, "oligos", true);
169                         if (temp == "not found"){       oligoFile = "";         }
170                         else if(temp == "not open"){    abort = true;   } 
171                         else                                    {       oligoFile = temp;               }
172                         
173                         temp = validParameter.validFile(parameters, "group", true);
174                         if (temp == "not found"){       groupfile = "";         }
175                         else if(temp == "not open"){    abort = true;   } 
176                         else                                    {       groupfile = temp;               }
177                         
178                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
179                         convert(temp, maxAmbig);  
180
181                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
182                         convert(temp, maxHomoP);  
183
184                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
185                         convert(temp, minLength); 
186                         
187                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
188                         convert(temp, maxLength);
189                         
190                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
191                         convert(temp, bdiffs);
192                         
193                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
194                         convert(temp, pdiffs);
195                         
196                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
197                         convert(temp, tdiffs);
198                         
199                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
200                         
201                         temp = validParameter.validFile(parameters, "qfile", true);     
202                         if (temp == "not found")        {       qFileName = "";         }
203                         else if(temp == "not open")     {       abort = true;           }
204                         else                                            {       qFileName = temp;       }
205                         
206                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
207                         convert(temp, qThreshold);
208                         
209                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
210                         qtrim = m->isTrue(temp);
211
212                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
213                         convert(temp, qRollAverage);
214
215                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
216                         convert(temp, qWindowAverage);
217
218                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
219                         convert(temp, qWindowSize);
220
221                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
222                         convert(temp, qWindowStep);
223
224                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
225                         convert(temp, qAverage);
226
227                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
228                         convert(temp, keepFirst);
229
230                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
231                         convert(temp, removeLast);
232                         
233                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
234                         allFiles = m->isTrue(temp);
235                         
236                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found") { temp = "1"; }
237                         convert(temp, processors); 
238                         
239                         if ((oligoFile != "") && (groupfile != "")) {
240                                 m->mothurOut("You given both a oligos file and a groupfile, only one is allowed."); m->mothurOutEndLine(); abort = true;
241                         }
242                                                                                                 
243                         
244                         if(allFiles && (oligoFile == "") && (groupfile == "")){
245                                 m->mothurOut("You selected allfiles, but didn't enter an oligos or group file.  Ignoring the allfiles request."); m->mothurOutEndLine();
246                         }
247                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
248                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
249                                 qAverage=0;
250                                 qThreshold=0;
251                         }
252                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
253                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
254                                 abort = true;
255                         }
256                 }
257
258         }
259         catch(exception& e) {
260                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
261                 exit(1);
262         }
263 }
264
265 //**********************************************************************************************************************
266
267 void TrimSeqsCommand::help(){
268         try {
269                 m->mothurOut("The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n");
270                 m->mothurOut("The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n");
271                 m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, group, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n");
272                 m->mothurOut("The fasta parameter is required.\n");
273                 m->mothurOut("The group parameter allows you to enter a group file for your fasta file.\n");
274                 m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n");
275                 m->mothurOut("The oligos parameter allows you to provide an oligos file.\n");
276                 m->mothurOut("The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n");
277                 m->mothurOut("The maxhomop parameter allows you to set a maximum homopolymer length. \n");
278                 m->mothurOut("The minlength parameter allows you to set and minimum sequence length. \n");
279                 m->mothurOut("The maxlength parameter allows you to set and maximum sequence length. \n");
280                 m->mothurOut("The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n");
281                 m->mothurOut("The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n");
282                 m->mothurOut("The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n");
283                 m->mothurOut("The qfile parameter allows you to provide a quality file.\n");
284                 m->mothurOut("The qthreshold parameter allows you to set a minimum quality score allowed. \n");
285                 m->mothurOut("The qaverage parameter allows you to set a minimum average quality score allowed. \n");
286                 m->mothurOut("The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n");
287                 m->mothurOut("The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n");
288                 m->mothurOut("The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n");
289                 m->mothurOut("The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n");
290                 m->mothurOut("The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n");
291                 m->mothurOut("The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n");
292                 m->mothurOut("The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n");
293                 m->mothurOut("The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n");
294                 m->mothurOut("The trim.seqs command should be in the following format: \n");
295                 m->mothurOut("trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n");
296                 m->mothurOut("maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n");    
297                 m->mothurOut("Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n");
298                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
299                 m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n\n");
300
301         }
302         catch(exception& e) {
303                 m->errorOut(e, "TrimSeqsCommand", "help");
304                 exit(1);
305         }
306 }
307
308
309 //***************************************************************************************************************
310
311 TrimSeqsCommand::~TrimSeqsCommand(){    /*      do nothing      */      }
312
313 //***************************************************************************************************************
314
315 int TrimSeqsCommand::execute(){
316         try{
317         
318                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
319                 
320                 numFPrimers = 0;  //this needs to be initialized
321                 numRPrimers = 0;
322                 vector<string> fastaFileNames;
323                 vector<string> qualFileNames;
324                 
325                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
326                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
327                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
328                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
329                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
330                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
331                 if (qFileName != "") {  outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile);  outputTypes["qual"].push_back(trimQualFile); outputTypes["qual"].push_back(scrapQualFile); }
332                 string groupFile = "";
333                 if (groupfile == "") { groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; }
334                 else{
335                         groupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "trim.groups";
336                         outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile);
337                         groupMap = new GroupMap(groupfile);
338                         groupMap->readMap();
339                         
340                         if(allFiles){
341                                 for (int i = 0; i < groupMap->namesOfGroups.size(); i++) {
342                                         groupToIndex[groupMap->namesOfGroups[i]] = i;
343                                         groupVector.push_back(groupMap->namesOfGroups[i]);
344                                         fastaFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) +  groupMap->namesOfGroups[i] + ".fasta"));
345                                         
346                                         //we append later, so we want to clear file
347                                         ofstream outRemove;
348                                         m->openOutputFile(fastaFileNames[i], outRemove);
349                                         outRemove.close();
350                                         if(qFileName != ""){
351                                                 qualFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) +  groupMap->namesOfGroups[i] + ".qual"));
352                                                 ofstream outRemove2;
353                                                 m->openOutputFile(qualFileNames[i], outRemove2);
354                                                 outRemove2.close();
355                                         }
356                                 }
357                         }
358                         comboStarts = fastaFileNames.size()-1;
359                 }
360                 
361                 if(oligoFile != ""){
362                         outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile);
363                         getOligos(fastaFileNames, qualFileNames);
364                 }
365
366                 vector<unsigned long int> fastaFilePos;
367                 vector<unsigned long int> qFilePos;
368                 
369                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
370                 
371                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
372                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
373                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
374                 }       
375                 if(qFileName == "")     {       qLines = lines; } //files with duds
376                 
377                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
378                                 if(processors == 1){
379                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
380                                 }else{
381                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames); 
382                                 }       
383                 #else
384                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
385                 #endif
386                 
387                 if (m->control_pressed) {  return 0; }                  
388                         
389                 set<string> blanks;
390                 for(int i=0;i<fastaFileNames.size();i++){
391                         if (m->isBlank(fastaFileNames[i])) {   blanks.insert(fastaFileNames[i]);        }
392                         else if (filesToRemove.count(fastaFileNames[i]) > 0) {  remove(fastaFileNames[i].c_str()); }
393                         else {
394                                 ifstream inFASTA;
395                                 string seqName;
396                                 m->openInputFile(fastaFileNames[i], inFASTA);
397                                 ofstream outGroups;
398                                 string outGroupFilename = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + "groups";
399                                 
400                                 //if the fastafile is on the blanks list then the groups file should be as well
401                                 if (blanks.count(fastaFileNames[i]) != 0) { blanks.insert(outGroupFilename); }
402                                 
403                                 m->openOutputFile(outGroupFilename, outGroups);
404                                 outputNames.push_back(outGroupFilename); outputTypes["group"].push_back(outGroupFilename);  
405                                 
406                                 string thisGroup = "";
407                                 if (i > comboStarts) {
408                                         map<string, int>::iterator itCombo;
409                                         for(itCombo=combos.begin();itCombo!=combos.end(); itCombo++){
410                                                 if(itCombo->second == i){       thisGroup = itCombo->first;     combos.erase(itCombo);  break;  }
411                                         }
412                                 }else{ thisGroup = groupVector[i]; }
413                                         
414                                 while(!inFASTA.eof()){
415                                         if(inFASTA.get() == '>'){
416                                                 inFASTA >> seqName;
417                                                 outGroups << seqName << '\t' << thisGroup << endl;
418                                         }
419                                         while (!inFASTA.eof())  {       char c = inFASTA.get(); if (c == 10 || c == 13){        break;  }       }
420                                 }
421                                 outGroups.close();
422                                 inFASTA.close();
423                         }
424                 }
425                 
426                 for (set<string>::iterator itBlanks = blanks.begin(); itBlanks != blanks.end(); itBlanks++) {  remove((*(itBlanks)).c_str()); }
427                 
428                 blanks.clear();
429                 if(qFileName != ""){
430                         for(int i=0;i<qualFileNames.size();i++){
431                                 if (m->isBlank(qualFileNames[i])) {  blanks.insert(qualFileNames[i]);   }
432                                 else if (filesToRemove.count(qualFileNames[i]) > 0) {  remove(qualFileNames[i].c_str()); }
433                         }
434                 }
435                 
436                 for (set<string>::iterator itBlanks = blanks.begin(); itBlanks != blanks.end(); itBlanks++) {  remove((*(itBlanks)).c_str()); }
437                 
438                 if (m->control_pressed) { 
439                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
440                         return 0;
441                 }
442
443                 m->mothurOutEndLine();
444                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
445                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
446                 m->mothurOutEndLine();
447                 
448                 return 0;       
449                         
450         }
451         catch(exception& e) {
452                 m->errorOut(e, "TrimSeqsCommand", "execute");
453                 exit(1);
454         }
455 }
456                 
457 /**************************************************************************************/
458
459 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames, linePair* line, linePair* qline) {      
460                 
461         try {
462                 
463                 ofstream outFASTA;
464                 m->openOutputFile(trimFile, outFASTA);
465                 
466                 ofstream scrapFASTA;
467                 m->openOutputFile(scrapFile, scrapFASTA);
468                 
469                 ofstream outQual;
470                 ofstream scrapQual;
471                 if(qFileName != ""){
472                         m->openOutputFile(trimQFile, outQual);
473                         m->openOutputFile(scrapQFile, scrapQual);
474                 }
475                 
476                 ofstream outGroups;
477                 
478                 if (oligoFile != "") {          
479                         m->openOutputFile(groupFile, outGroups);   
480                 }
481                 
482                 ifstream inFASTA;
483                 m->openInputFile(filename, inFASTA);
484                 inFASTA.seekg(line->start);
485                 
486                 ifstream qFile;
487                 if(qFileName != "")     {       m->openInputFile(qFileName, qFile);     qFile.seekg(qline->start);  }
488                 
489                 
490                 for (int i = 0; i < fastaNames.size(); i++) { //clears old file
491                         ofstream temp;
492                         m->openOutputFile(fastaNames[i], temp);
493                         temp.close();
494                 }
495                 for (int i = 0; i < qualNames.size(); i++) { //clears old file
496                         ofstream temp;
497                         m->openOutputFile(qualNames[i], temp);
498                         temp.close();
499                 }
500                 
501                         
502                 bool done = false;
503                 int count = 0;
504         
505                 while (!done) {
506                                 
507                         if (m->control_pressed) { 
508                                 inFASTA.close(); outFASTA.close(); scrapFASTA.close();
509                                 if (oligoFile != "") {   outGroups.close();   }
510
511                                 if(qFileName != ""){
512                                         qFile.close();
513                                 }
514                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
515
516                                 return 0;
517                         }
518                         
519                         int success = 1;
520                         
521
522                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
523
524                         QualityScores currQual;
525                         if(qFileName != ""){
526                                 currQual = QualityScores(qFile, currSeq.getNumBases());  m->gobble(qFile);
527                         }
528                         
529                         string origSeq = currSeq.getUnaligned();
530                         if (origSeq != "") {
531                                 int groupBar, groupPrime;
532                                 string trashCode = "";
533                                 int currentSeqsDiffs = 0;
534
535                                 if(barcodes.size() != 0){
536                                         success = stripBarcode(currSeq, currQual, groupBar);
537                                         if(success > bdiffs)            {       trashCode += 'b';       }
538                                         else{ currentSeqsDiffs += success;  }
539                                 }
540                                 
541                                 if(numFPrimers != 0){
542                                         success = stripForward(currSeq, currQual, groupPrime);
543                                         if(success > pdiffs)            {       trashCode += 'f';       }
544                                         else{ currentSeqsDiffs += success;  }
545                                 }
546                                 
547                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
548                                 
549                                 if(numRPrimers != 0){
550                                         success = stripReverse(currSeq, currQual);
551                                         if(!success)                            {       trashCode += 'r';       }
552                                 }
553
554                                 if(keepFirst != 0){
555                                         success = keepFirstTrim(currSeq, currQual);
556                                 }
557                                 
558                                 if(removeLast != 0){
559                                         success = removeLastTrim(currSeq, currQual);
560                                         if(!success)                            {       trashCode += 'l';       }
561                                 }
562
563                                 
564                                 if(qFileName != ""){
565                                         int origLength = currSeq.getNumBases();
566                                         
567                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
568                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
569                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
570                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
571                                         else                                            {       success = 1;                            }
572                                         
573                                         //you don't want to trim, if it fails above then scrap it
574                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
575                                         
576                                         if(!success)                            {       trashCode += 'q';       }
577                                 }                               
578                 
579                                 if(minLength > 0 || maxLength > 0){
580                                         success = cullLength(currSeq);
581                                         if(!success)                            {       trashCode += 'l';       }
582                                 }
583                                 if(maxHomoP > 0){
584                                         success = cullHomoP(currSeq);
585                                         if(!success)                            {       trashCode += 'h';       }
586                                 }
587                                 if(maxAmbig != -1){
588                                         success = cullAmbigs(currSeq);
589                                         if(!success)                            {       trashCode += 'n';       }
590                                 }
591                                 
592                                 if(flip){               // should go last                       
593                                         currSeq.reverseComplement();
594                                         currQual.flipQScores(); 
595                                 }
596                                 
597                                 if(trashCode.length() == 0){
598                                         currSeq.setAligned(currSeq.getUnaligned());
599                                         currSeq.printSequence(outFASTA);
600                                         currQual.printQScores(outQual);
601                                         
602                                         if(barcodes.size() != 0){
603                                                 string thisGroup = groupVector[groupBar];
604                                                 int indexToFastaFile = groupBar;
605                                                 if (primers.size() != 0){
606                                                         //does this primer have a group
607                                                         if (groupVector[groupPrime] != "") {  
608                                                                 thisGroup += "." + groupVector[groupPrime]; 
609                                                                 indexToFastaFile = combos[thisGroup];
610                                                         }
611                                                 }
612                                                 outGroups << currSeq.getName() << '\t' << thisGroup << endl;
613                                                 if(allFiles){
614                                                         ofstream outTemp;
615                                                         m->openOutputFileAppend(fastaNames[indexToFastaFile], outTemp);
616                                                         //currSeq.printSequence(*fastaFileNames[indexToFastaFile]);
617                                                         currSeq.printSequence(outTemp);
618                                                         outTemp.close();
619                                                         
620                                                         if(qFileName != ""){
621                                                                 //currQual.printQScores(*qualFileNames[indexToFastaFile]);
622                                                                 ofstream outTemp2;
623                                                                 m->openOutputFileAppend(qualNames[indexToFastaFile], outTemp2);
624                                                                 currQual.printQScores(outTemp2);
625                                                                 outTemp2.close();                                                       
626                                                         }
627                                                 }
628                                         }
629                                         
630                                         if (groupfile != "") {
631                                                 string thisGroup = groupMap->getGroup(currSeq.getName());
632                                                 
633                                                 if (thisGroup != "not found") {
634                                                         outGroups << currSeq.getName() << '\t' << thisGroup << endl;
635                                                         if (allFiles) {
636                                                                 ofstream outTemp;
637                                                                 m->openOutputFileAppend(fastaNames[groupToIndex[thisGroup]], outTemp);
638                                                                 currSeq.printSequence(outTemp);
639                                                                 outTemp.close();
640                                                                 if(qFileName != ""){
641                                                                         ofstream outTemp2;
642                                                                         m->openOutputFileAppend(qualNames[groupToIndex[thisGroup]], outTemp2);
643                                                                         currQual.printQScores(outTemp2);
644                                                                         outTemp2.close();                                                       
645                                                                 }
646                                                         }
647                                                 }else{
648                                                         m->mothurOut(currSeq.getName() + " is not in your groupfile, adding to group XXX."); m->mothurOutEndLine();
649                                                         outGroups << currSeq.getName() << '\t' << "XXX" << endl;
650                                                         if (allFiles) {  
651                                                                 m->mothurOut("[ERROR]: " + currSeq.getName() + " will not be added to any .group.fasta or .group.qual file."); m->mothurOutEndLine();
652                                                         }
653                                                 }
654                                         }
655                                 }
656                                 else{
657                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
658                                         currSeq.setUnaligned(origSeq);
659                                         currSeq.setAligned(origSeq);
660                                         currSeq.printSequence(scrapFASTA);
661                                         currQual.printQScores(scrapQual);
662                                 }
663                                 count++;
664                         }
665                         
666                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
667                                 unsigned long int pos = inFASTA.tellg();
668                                 if ((pos == -1) || (pos >= line->end)) { break; }
669                         #else
670                                 if (inFASTA.eof()) { break; }
671                         #endif
672                                 
673                         //report progress
674                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
675                         
676                 }
677                 //report progress
678                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
679
680                 
681                 inFASTA.close();
682                 outFASTA.close();
683                 scrapFASTA.close();
684                 if (oligoFile != "") {   outGroups.close();   }
685                 if(qFileName != "")     {       qFile.close();  scrapQual.close(); outQual.close();     }
686                 
687                 return count;
688         }
689         catch(exception& e) {
690                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
691                 exit(1);
692         }
693 }
694
695 /**************************************************************************************************/
696
697 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFile, string scrapFile, string trimQFile, string scrapQFile, string groupFile, vector<string> fastaNames, vector<string> qualNames) {
698         try {
699 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
700                 int process = 1;
701                 int exitCommand = 1;
702                 processIDS.clear();
703                 
704                 //loop through and create all the processes you want
705                 while (process != processors) {
706                         int pid = fork();
707                         
708                         if (pid > 0) {
709                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
710                                 process++;
711                         }else if (pid == 0){
712                                 for (int i = 0; i < fastaNames.size(); i++) {
713                                         fastaNames[i] = (fastaNames[i] + toString(getpid()) + ".temp");
714                                         //clear old file if it exists
715                                         ofstream temp;
716                                         m->openOutputFile(fastaNames[i], temp);
717                                         temp.close();
718                                         if(qFileName != ""){
719                                                 qualNames[i] = (qualNames[i] + toString(getpid()) + ".temp");
720                                                 //clear old file if it exists
721                                                 ofstream temp2;
722                                                 m->openOutputFile(qualNames[i], temp2);
723                                                 temp2.close();
724                                         }
725                                 }
726                                 
727                                 driverCreateTrim(filename, qFileName, (trimFile + toString(getpid()) + ".temp"), (scrapFile + toString(getpid()) + ".temp"), (trimQFile + toString(getpid()) + ".temp"), (scrapQFile + toString(getpid()) + ".temp"), (groupFile + toString(getpid()) + ".temp"), fastaNames, qualNames, lines[process], qLines[process]);
728                                 exit(0);
729                         }else { 
730                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
731                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
732                                 exit(0);
733                         }
734                 }
735                 
736                 //parent do my part
737                 for (int i = 0; i < fastaNames.size(); i++) {
738                         //clear old file if it exists
739                         ofstream temp;
740                         m->openOutputFile(fastaNames[i], temp);
741                         temp.close();
742                         if(qFileName != ""){
743                                 //clear old file if it exists
744                                 ofstream temp2;
745                                 m->openOutputFile(qualNames[i], temp2);
746                                 temp2.close();
747                         }
748                 }
749                 
750                 driverCreateTrim(filename, qFileName, trimFile, scrapFile, trimQFile, scrapQFile, groupFile, fastaNames, qualNames, lines[0], qLines[0]);
751                 
752                 
753                 //force parent to wait until all the processes are done
754                 for (int i=0;i<processIDS.size();i++) { 
755                         int temp = processIDS[i];
756                         wait(&temp);
757                 }
758                 
759                 //append files
760                 for(int i=0;i<processIDS.size();i++){
761                         
762                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
763                         
764                         m->appendFiles((trimFile + toString(processIDS[i]) + ".temp"), trimFile);
765                         remove((trimFile + toString(processIDS[i]) + ".temp").c_str());
766                         m->appendFiles((scrapFile + toString(processIDS[i]) + ".temp"), scrapFile);
767                         remove((scrapFile + toString(processIDS[i]) + ".temp").c_str());
768                         
769                         m->mothurOut("Done with fasta files"); m->mothurOutEndLine();
770                         
771                         if(qFileName != ""){
772                                 m->appendFiles((trimQFile + toString(processIDS[i]) + ".temp"), trimQFile);
773                                 remove((trimQFile + toString(processIDS[i]) + ".temp").c_str());
774                                 m->appendFiles((scrapQFile + toString(processIDS[i]) + ".temp"), scrapQFile);
775                                 remove((scrapQFile + toString(processIDS[i]) + ".temp").c_str());
776                         
777                                 m->mothurOut("Done with quality files"); m->mothurOutEndLine();
778                         }
779                         
780                         m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
781                         remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
782                         
783                         m->mothurOut("Done with group file"); m->mothurOutEndLine();
784                         
785                         for (int j = 0; j < fastaNames.size(); j++) {
786                                 m->appendFiles((fastaNames[j] + toString(processIDS[i]) + ".temp"), fastaNames[j]);
787                                 remove((fastaNames[j] + toString(processIDS[i]) + ".temp").c_str());
788                         }
789                         
790                         if(qFileName != ""){
791                                 for (int j = 0; j < qualNames.size(); j++) {
792                                         m->appendFiles((qualNames[j] + toString(processIDS[i]) + ".temp"), qualNames[j]);
793                                         remove((qualNames[j] + toString(processIDS[i]) + ".temp").c_str());
794                                 }
795                         }
796                         
797                         if (allFiles) { m->mothurOut("Done with allfiles"); m->mothurOutEndLine(); }
798                 }
799         
800                 return exitCommand;
801 #endif          
802         }
803         catch(exception& e) {
804                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
805                 exit(1);
806         }
807 }
808
809 /**************************************************************************************************/
810
811 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
812         try {
813                 
814                 //set file positions for fasta file
815                 fastaFilePos = m->divideFile(filename, processors);
816                 
817                 if (qfilename == "") { return processors; }
818                 
819                 //get name of first sequence in each chunk
820                 map<string, int> firstSeqNames;
821                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
822                         ifstream in;
823                         m->openInputFile(filename, in);
824                         in.seekg(fastaFilePos[i]);
825                 
826                         Sequence temp(in); 
827                         firstSeqNames[temp.getName()] = i;
828                 
829                         in.close();
830                 }
831                                 
832                 //seach for filePos of each first name in the qfile and save in qfileFilePos
833                 ifstream inQual;
834                 m->openInputFile(qfilename, inQual);
835                 
836                 string input;
837                 while(!inQual.eof()){   
838                         input = m->getline(inQual);
839
840                         if (input.length() != 0) {
841                                 if(input[0] == '>'){ //this is a sequence name line
842                                         istringstream nameStream(input);
843                                         
844                                         string sname = "";  nameStream >> sname;
845                                         sname = sname.substr(1);
846                                         
847                                         map<string, int>::iterator it = firstSeqNames.find(sname);
848                                         
849                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
850                                                 unsigned long int pos = inQual.tellg(); 
851                                                 qfileFilePos.push_back(pos - input.length() - 1);       
852                                                 firstSeqNames.erase(it);
853                                         }
854                                 }
855                         }
856                         
857                         if (firstSeqNames.size() == 0) { break; }
858                 }
859                 inQual.close();
860                 
861                 
862                 if (firstSeqNames.size() != 0) { 
863                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
864                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
865                         }
866                         qFileName = "";
867                         return processors;
868                 }
869
870                 //get last file position of qfile
871                 FILE * pFile;
872                 unsigned long int size;
873                 
874                 //get num bytes in file
875                 pFile = fopen (qfilename.c_str(),"rb");
876                 if (pFile==NULL) perror ("Error opening file");
877                 else{
878                         fseek (pFile, 0, SEEK_END);
879                         size=ftell (pFile);
880                         fclose (pFile);
881                 }
882                 
883                 qfileFilePos.push_back(size);
884                 
885                 return processors;
886         }
887         catch(exception& e) {
888                 m->errorOut(e, "TrimSeqsCommand", "setLines");
889                 exit(1);
890         }
891 }
892
893 //***************************************************************************************************************
894
895 void TrimSeqsCommand::getOligos(vector<string>& outFASTAVec, vector<string>& outQualVec){
896         try {
897                 ifstream inOligos;
898                 m->openInputFile(oligoFile, inOligos);
899                 
900                 ofstream test;
901                 
902                 string type, oligo, group;
903                 int index=0;
904                 //int indexPrimer = 0;
905                 
906                 while(!inOligos.eof()){
907                         inOligos >> type; m->gobble(inOligos);
908                                         
909                         if(type[0] == '#'){
910                                 while (!inOligos.eof()) {       char c = inOligos.get(); if (c == 10 || c == 13){       break;  }       } // get rest of line if there's any crap there
911                         }
912                         else{
913                                 //make type case insensitive
914                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
915                                 
916                                 inOligos >> oligo;
917                                 
918                                 for(int i=0;i<oligo.length();i++){
919                                         oligo[i] = toupper(oligo[i]);
920                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
921                                 }
922                                 
923                                 if(type == "FORWARD"){
924                                         group = "";
925                                         
926                                         // get rest of line in case there is a primer name
927                                         while (!inOligos.eof()) {       
928                                                 char c = inOligos.get(); 
929                                                 if (c == 10 || c == 13){        break;  }
930                                                 else if (c == 32 || c == 9){;} //space or tab
931                                                 else {  group += c;  }
932                                         } 
933                                         
934                                         //check for repeat barcodes
935                                         map<string, int>::iterator itPrime = primers.find(oligo);
936                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
937                                         
938                                                 primers[oligo]=index; index++;
939                                                 groupVector.push_back(group);
940                                         
941                                                 if(allFiles){
942                                                         outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
943                                                         if(qFileName != ""){
944                                                                 outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
945                                                         }
946                                                         if (group == "") { //if there is not a group for this primer, then this file will not get written to, but we add it to keep the indexes correct
947                                                                 filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
948                                                                 if(qFileName != ""){
949                                                                         filesToRemove.insert((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
950                                                                 }
951                                                         }else {
952                                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
953                                                                 outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
954                                                                 if(qFileName != ""){
955                                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
956                                                                         outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
957                                                                 }                                                       
958                                                         }
959                                                 }
960                                         
961                                 }
962                                 else if(type == "REVERSE"){
963                                         Sequence oligoRC("reverse", oligo);
964                                         oligoRC.reverseComplement();
965                                         revPrimer.push_back(oligoRC.getUnaligned());
966                                 }
967                                 else if(type == "BARCODE"){
968                                         inOligos >> group;
969                                         
970                                         //check for repeat barcodes
971                                         map<string, int>::iterator itBar = barcodes.find(oligo);
972                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
973                                         
974                                                 barcodes[oligo]=index; index++;
975                                                 groupVector.push_back(group);
976                                                 
977                                                 if(allFiles){
978                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
979                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
980                                                         outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
981                                                         if(qFileName != ""){
982                                                                 outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
983                                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
984                                                                 outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
985                                                         }                                                       
986                                                 }
987                                         
988                                 }else{  m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
989                         }
990                         m->gobble(inOligos);
991                 }
992                 
993                 inOligos.close();
994                 
995                 //add in potential combos
996                 if(allFiles){
997                         comboStarts = outFASTAVec.size()-1;
998                         for (map<string, int>::iterator itBar = barcodes.begin(); itBar != barcodes.end(); itBar++) {
999                                 for (map<string, int>::iterator itPrime = primers.begin(); itPrime != primers.end(); itPrime++) {
1000                                         if (groupVector[itPrime->second] != "") { //there is a group for this primer
1001                                                 outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
1002                                                 outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
1003                                                 outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
1004                                                 combos[(groupVector[itBar->second] + "." + groupVector[itPrime->second])] = outFASTAVec.size()-1;
1005                                                 
1006                                                 if(qFileName != ""){
1007                                                         outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
1008                                                         outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
1009                                                         outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
1010                                                 }
1011                                         }
1012                                 }
1013                         }
1014                 }
1015                 
1016                 numFPrimers = primers.size();
1017                 numRPrimers = revPrimer.size();
1018                 
1019         }
1020         catch(exception& e) {
1021                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1022                 exit(1);
1023         }
1024 }
1025 //***************************************************************************************************************
1026
1027 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
1028         try {
1029                 
1030                 string rawSequence = seq.getUnaligned();
1031                 int success = bdiffs + 1;       //guilty until proven innocent
1032                 
1033                 //can you find the barcode
1034                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1035                         string oligo = it->first;
1036                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
1037                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1038                                 break;  
1039                         }
1040                         
1041                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1042                                 group = it->second;
1043                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1044                                 
1045                                 if(qual.getName() != ""){
1046                                         qual.trimQScores(oligo.length(), -1);
1047                                 }
1048                                 
1049                                 success = 0;
1050                                 break;
1051                         }
1052                 }
1053                 
1054                 //if you found the barcode or if you don't want to allow for diffs
1055 //              cout << success;
1056                 if ((bdiffs == 0) || (success == 0)) { return success;  }
1057                 
1058                 else { //try aligning and see if you can find it
1059 //                      cout << endl;
1060
1061                         int maxLength = 0;
1062
1063                         Alignment* alignment;
1064                         if (barcodes.size() > 0) {
1065                                 map<string,int>::iterator it=barcodes.begin();
1066
1067                                 for(it;it!=barcodes.end();it++){
1068                                         if(it->first.length() > maxLength){
1069                                                 maxLength = it->first.length();
1070                                         }
1071                                 }
1072                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
1073
1074                         }else{ alignment = NULL; } 
1075                         
1076                         //can you find the barcode
1077                         int minDiff = 1e6;
1078                         int minCount = 1;
1079                         int minGroup = -1;
1080                         int minPos = 0;
1081                         
1082                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1083                                 string oligo = it->first;
1084 //                              int length = oligo.length();
1085                                 
1086                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
1087                                         success = bdiffs + 10;
1088                                         break;
1089                                 }
1090                                 
1091                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1092                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1093                                 oligo = alignment->getSeqAAln();
1094                                 string temp = alignment->getSeqBAln();
1095                 
1096                                 int alnLength = oligo.length();
1097                                 
1098                                 for(int i=oligo.length()-1;i>=0;i--){
1099                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1100                                 }
1101                                 oligo = oligo.substr(0,alnLength);
1102                                 temp = temp.substr(0,alnLength);
1103                                 
1104                                 int numDiff = countDiffs(oligo, temp);
1105                                 
1106 //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
1107                                 
1108                                 if(numDiff < minDiff){
1109                                         minDiff = numDiff;
1110                                         minCount = 1;
1111                                         minGroup = it->second;
1112                                         minPos = 0;
1113                                         for(int i=0;i<alnLength;i++){
1114                                                 if(temp[i] != '-'){
1115                                                         minPos++;
1116                                                 }
1117                                         }
1118                                 }
1119                                 else if(numDiff == minDiff){
1120                                         minCount++;
1121                                 }
1122
1123                         }
1124
1125                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1126                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1127                         else{                                                                                                   //use the best match
1128                                 group = minGroup;
1129                                 seq.setUnaligned(rawSequence.substr(minPos));
1130                                 
1131                                 if(qual.getName() != ""){
1132                                         qual.trimQScores(minPos, -1);
1133                                 }
1134                                 success = minDiff;
1135                         }
1136                         
1137                         if (alignment != NULL) {  delete alignment;  }
1138                         
1139                 }
1140 //              cout << success << endl;
1141                 
1142                 return success;
1143                 
1144         }
1145         catch(exception& e) {
1146                 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1147                 exit(1);
1148         }
1149
1150 }
1151
1152 //***************************************************************************************************************
1153
1154 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1155         try {
1156                 string rawSequence = seq.getUnaligned();
1157                 int success = pdiffs + 1;       //guilty until proven innocent
1158                 
1159                 //can you find the primer
1160                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1161                         string oligo = it->first;
1162                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1163                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1164                                 break;  
1165                         }
1166                         
1167                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1168                                 group = it->second;
1169                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1170                                 if(qual.getName() != ""){
1171                                         qual.trimQScores(oligo.length(), -1);
1172                                 }
1173                                 success = 0;
1174                                 break;
1175                         }
1176                 }
1177
1178                 //if you found the barcode or if you don't want to allow for diffs
1179 //              cout << success;
1180                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1181                 
1182                 else { //try aligning and see if you can find it
1183 //                      cout << endl;
1184
1185                         int maxLength = 0;
1186
1187                         Alignment* alignment;
1188                         if (primers.size() > 0) {
1189                                 map<string,int>::iterator it=primers.begin();
1190
1191                                 for(it;it!=primers.end();it++){
1192                                         if(it->first.length() > maxLength){
1193                                                 maxLength = it->first.length();
1194                                         }
1195                                 }
1196                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
1197
1198                         }else{ alignment = NULL; } 
1199                         
1200                         //can you find the barcode
1201                         int minDiff = 1e6;
1202                         int minCount = 1;
1203                         int minGroup = -1;
1204                         int minPos = 0;
1205                         
1206                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1207                                 string oligo = it->first;
1208 //                              int length = oligo.length();
1209                                 
1210                                 if(rawSequence.length() < maxLength){   
1211                                         success = pdiffs + 100;
1212                                         break;
1213                                 }
1214                                 
1215                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1216                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1217                                 oligo = alignment->getSeqAAln();
1218                                 string temp = alignment->getSeqBAln();
1219                 
1220                                 int alnLength = oligo.length();
1221                                 
1222                                 for(int i=oligo.length()-1;i>=0;i--){
1223                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1224                                 }
1225                                 oligo = oligo.substr(0,alnLength);
1226                                 temp = temp.substr(0,alnLength);
1227                                 
1228                                 int numDiff = countDiffs(oligo, temp);
1229                                 
1230 //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
1231                                 
1232                                 if(numDiff < minDiff){
1233                                         minDiff = numDiff;
1234                                         minCount = 1;
1235                                         minGroup = it->second;
1236                                         minPos = 0;
1237                                         for(int i=0;i<alnLength;i++){
1238                                                 if(temp[i] != '-'){
1239                                                         minPos++;
1240                                                 }
1241                                         }
1242                                 }
1243                                 else if(numDiff == minDiff){
1244                                         minCount++;
1245                                 }
1246
1247                         }
1248
1249                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1250                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1251                         else{                                                                                                   //use the best match
1252                                 group = minGroup;
1253                                 seq.setUnaligned(rawSequence.substr(minPos));
1254                                 if(qual.getName() != ""){
1255                                         qual.trimQScores(minPos, -1);
1256                                 }
1257                                 success = minDiff;
1258                         }
1259                         
1260                         if (alignment != NULL) {  delete alignment;  }
1261                         
1262                 }
1263                 
1264                 return success;
1265
1266         }
1267         catch(exception& e) {
1268                 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1269                 exit(1);
1270         }
1271 }
1272
1273 //***************************************************************************************************************
1274
1275 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1276         try {
1277                 string rawSequence = seq.getUnaligned();
1278                 bool success = 0;       //guilty until proven innocent
1279                 
1280                 for(int i=0;i<numRPrimers;i++){
1281                         string oligo = revPrimer[i];
1282                         
1283                         if(rawSequence.length() < oligo.length()){
1284                                 success = 0;
1285                                 break;
1286                         }
1287                         
1288                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1289                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1290                                 if(qual.getName() != ""){
1291                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1292                                 }
1293                                 success = 1;
1294                                 break;
1295                         }
1296                 }       
1297                 return success;
1298                 
1299         }
1300         catch(exception& e) {
1301                 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1302                 exit(1);
1303         }
1304 }
1305
1306 //***************************************************************************************************************
1307
1308 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1309         try {
1310                 bool success = 1;
1311                 if(qscores.getName() != ""){
1312                         qscores.trimQScores(-1, keepFirst);
1313                 }
1314                 sequence.trim(keepFirst);
1315                 return success;
1316         }
1317         catch(exception& e) {
1318                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1319                 exit(1);
1320         }
1321         
1322 }       
1323
1324 //***************************************************************************************************************
1325
1326 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1327         try {
1328                 bool success = 0;
1329                 
1330                 int length = sequence.getNumBases() - removeLast;
1331                 
1332                 if(length > 0){
1333                         if(qscores.getName() != ""){
1334                                 qscores.trimQScores(-1, length);
1335                         }
1336                         sequence.trim(length);
1337                         success = 1;
1338                 }
1339                 else{
1340                         success = 0;
1341                 }
1342
1343                 return success;
1344         }
1345         catch(exception& e) {
1346                 m->errorOut(e, "removeLastTrim", "countDiffs");
1347                 exit(1);
1348         }
1349         
1350 }       
1351
1352 //***************************************************************************************************************
1353
1354 bool TrimSeqsCommand::cullLength(Sequence& seq){
1355         try {
1356         
1357                 int length = seq.getNumBases();
1358                 bool success = 0;       //guilty until proven innocent
1359                 
1360                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1361                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1362                 else                                                                                            {       success = 0;    }
1363                 
1364                 return success;
1365         
1366         }
1367         catch(exception& e) {
1368                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1369                 exit(1);
1370         }
1371         
1372 }
1373
1374 //***************************************************************************************************************
1375
1376 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1377         try {
1378                 int longHomoP = seq.getLongHomoPolymer();
1379                 bool success = 0;       //guilty until proven innocent
1380                 
1381                 if(longHomoP <= maxHomoP){      success = 1;    }
1382                 else                                    {       success = 0;    }
1383                 
1384                 return success;
1385         }
1386         catch(exception& e) {
1387                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1388                 exit(1);
1389         }
1390         
1391 }
1392
1393 //***************************************************************************************************************
1394
1395 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1396         try {
1397                 int numNs = seq.getAmbigBases();
1398                 bool success = 0;       //guilty until proven innocent
1399                 
1400                 if(numNs <= maxAmbig)   {       success = 1;    }
1401                 else                                    {       success = 0;    }
1402                 
1403                 return success;
1404         }
1405         catch(exception& e) {
1406                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1407                 exit(1);
1408         }
1409         
1410 }
1411
1412 //***************************************************************************************************************
1413
1414 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1415         try {
1416                 bool success = 1;
1417                 int length = oligo.length();
1418                 
1419                 for(int i=0;i<length;i++){
1420                         
1421                         if(oligo[i] != seq[i]){
1422                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1423                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1424                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1425                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1426                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1427                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1428                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1429                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1430                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1431                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1432                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1433                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1434                                 
1435                                 if(success == 0)        {       break;   }
1436                         }
1437                         else{
1438                                 success = 1;
1439                         }
1440                 }
1441                 
1442                 return success;
1443         }
1444         catch(exception& e) {
1445                 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1446                 exit(1);
1447         }
1448
1449 }
1450
1451 //***************************************************************************************************************
1452
1453 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1454         try {
1455
1456                 int length = oligo.length();
1457                 int countDiffs = 0;
1458                 
1459                 for(int i=0;i<length;i++){
1460                                                                 
1461                         if(oligo[i] != seq[i]){
1462                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1463                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1464                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1465                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1466                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1467                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1468                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1469                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1470                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1471                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1472                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1473                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1474                         }
1475                         
1476                 }
1477                 
1478                 return countDiffs;
1479         }
1480         catch(exception& e) {
1481                 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1482                 exit(1);
1483         }
1484
1485 }
1486
1487 //***************************************************************************************************************