]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
optimizing classify.seqs calculating of template probabilities.
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::setParameters(){        
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
22                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
23                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
24                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
25                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
26                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
27                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
28         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
29                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
30         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
31                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
32                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
33                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
34                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
35                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
36                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
37                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
38                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
39                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
40                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
41                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
42                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
43                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
44                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
45                         
46                 vector<string> myArray;
47                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string TrimSeqsCommand::getHelpString(){        
57         try {
58                 string helpString = "";
59                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
60                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
61                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
62                 helpString += "The fasta parameter is required.\n";
63                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
64                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
65                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
66                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
67                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
68                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
69                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
70                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
71                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
72                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
73         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
74                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
75                 helpString += "The qfile parameter allows you to provide a quality file.\n";
76                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
77                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
78                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
79                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
80                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
81                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
82                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
83                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
84                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
85                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
86                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
87                 helpString += "The trim.seqs command should be in the following format: \n";
88                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
89                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
90                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
91                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
92                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101 string TrimSeqsCommand::getOutputFileNameTag(string type, string inputName=""){ 
102         try {
103         string outputFileName = "";
104                 map<string, vector<string> >::iterator it;
105         
106         //is this a type this command creates
107         it = outputTypes.find(type);
108         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
109         else {
110             if (type == "qfile")            {   outputFileName =  "qual";   }
111             else if (type == "fasta")            {   outputFileName =  "fasta";   }
112             else if (type == "group")            {   outputFileName =  "groups";   }
113             else if (type == "name")            {   outputFileName =  "names";   }
114             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
115         }
116         return outputFileName;
117         }
118         catch(exception& e) {
119                 m->errorOut(e, "TrimSeqsCommand", "getOutputFileNameTag");
120                 exit(1);
121         }
122 }
123
124
125 //**********************************************************************************************************************
126
127 TrimSeqsCommand::TrimSeqsCommand(){     
128         try {
129                 abort = true; calledHelp = true; 
130                 setParameters();
131                 vector<string> tempOutNames;
132                 outputTypes["fasta"] = tempOutNames;
133                 outputTypes["qfile"] = tempOutNames;
134                 outputTypes["group"] = tempOutNames;
135                 outputTypes["name"] = tempOutNames;
136         }
137         catch(exception& e) {
138                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
139                 exit(1);
140         }
141 }
142 //***************************************************************************************************************
143
144 TrimSeqsCommand::TrimSeqsCommand(string option)  {
145         try {
146                 
147                 abort = false; calledHelp = false;   
148                 comboStarts = 0;
149                 
150                 //allow user to run help
151                 if(option == "help") { help(); abort = true; calledHelp = true; }
152                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
153                 
154                 else {
155                         vector<string> myArray = setParameters();
156                         
157                         OptionParser parser(option);
158                         map<string,string> parameters = parser.getParameters();
159                         
160                         ValidParameters validParameter;
161                         map<string,string>::iterator it;
162                         
163                         //check to make sure all parameters are valid for command
164                         for (it = parameters.begin(); it != parameters.end(); it++) { 
165                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
166                         }
167                         
168                         //initialize outputTypes
169                         vector<string> tempOutNames;
170                         outputTypes["fasta"] = tempOutNames;
171                         outputTypes["qfile"] = tempOutNames;
172                         outputTypes["group"] = tempOutNames;
173                         outputTypes["name"] = tempOutNames;
174                         
175                         //if the user changes the input directory command factory will send this info to us in the output parameter 
176                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
177                         if (inputDir == "not found"){   inputDir = "";          }
178                         else {
179                                 string path;
180                                 it = parameters.find("fasta");
181                                 //user has given a template file
182                                 if(it != parameters.end()){ 
183                                         path = m->hasPath(it->second);
184                                         //if the user has not given a path then, add inputdir. else leave path alone.
185                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
186                                 }
187                                 
188                                 it = parameters.find("oligos");
189                                 //user has given a template file
190                                 if(it != parameters.end()){ 
191                                         path = m->hasPath(it->second);
192                                         //if the user has not given a path then, add inputdir. else leave path alone.
193                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
194                                 }
195                                 
196                                 it = parameters.find("qfile");
197                                 //user has given a template file
198                                 if(it != parameters.end()){ 
199                                         path = m->hasPath(it->second);
200                                         //if the user has not given a path then, add inputdir. else leave path alone.
201                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
202                                 }
203                                 
204                                 it = parameters.find("name");
205                                 //user has given a template file
206                                 if(it != parameters.end()){ 
207                                         path = m->hasPath(it->second);
208                                         //if the user has not given a path then, add inputdir. else leave path alone.
209                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
210                                 }
211                                 
212                         }
213
214                         
215                         //check for required parameters
216                         fastaFile = validParameter.validFile(parameters, "fasta", true);
217                         if (fastaFile == "not found") {                                 
218                                 fastaFile = m->getFastaFile(); 
219                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
220                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
221                         }else if (fastaFile == "not open") { abort = true; }    
222                         else { m->setFastaFile(fastaFile); }
223                         
224                         //if the user changes the output directory command factory will send this info to us in the output parameter 
225                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
226                                 outputDir = ""; 
227                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
228                         }
229                 
230                         
231                         //check for optional parameter and set defaults
232                         // ...at some point should added some additional type checking...
233                         string temp;
234                         temp = validParameter.validFile(parameters, "flip", false);
235                         if (temp == "not found")    {   flip = 0;       }
236                         else {  flip = m->isTrue(temp);         }
237                 
238                         temp = validParameter.validFile(parameters, "oligos", true);
239                         if (temp == "not found"){       oligoFile = "";         }
240                         else if(temp == "not open"){    abort = true;   } 
241                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
242                         
243                         
244                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
245                         m->mothurConvert(temp, maxAmbig);  
246
247                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
248                         m->mothurConvert(temp, maxHomoP);  
249
250                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
251                         m->mothurConvert(temp, minLength); 
252                         
253                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
254                         m->mothurConvert(temp, maxLength);
255                         
256                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
257                         m->mothurConvert(temp, bdiffs);
258                         
259                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
260                         m->mothurConvert(temp, pdiffs);
261             
262             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
263                         m->mothurConvert(temp, ldiffs);
264             
265             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
266                         m->mothurConvert(temp, sdiffs);
267                         
268                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
269                         m->mothurConvert(temp, tdiffs);
270                         
271                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
272                         
273                         temp = validParameter.validFile(parameters, "qfile", true);     
274                         if (temp == "not found")        {       qFileName = "";         }
275                         else if(temp == "not open")     {       abort = true;           }
276                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
277                         
278                         temp = validParameter.validFile(parameters, "name", true);      
279                         if (temp == "not found")        {       nameFile = "";          }
280                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
281                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
282                         
283                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
284                         m->mothurConvert(temp, qThreshold);
285                         
286                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
287                         qtrim = m->isTrue(temp);
288
289                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
290                         convert(temp, qRollAverage);
291
292                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
293                         convert(temp, qWindowAverage);
294
295                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
296                         convert(temp, qWindowSize);
297
298                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
299                         convert(temp, qWindowStep);
300
301                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
302                         convert(temp, qAverage);
303
304                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
305                         convert(temp, keepFirst);
306
307                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
308                         convert(temp, removeLast);
309                         
310                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
311                         allFiles = m->isTrue(temp);
312             
313             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
314                         keepforward = m->isTrue(temp);
315                         
316                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
317                         m->setProcessors(temp);
318                         m->mothurConvert(temp, processors); 
319                         
320                         
321                         if(allFiles && (oligoFile == "")){
322                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
323                         }
324                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
325                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
326                                 qAverage=0;
327                                 qThreshold=0;
328                         }
329                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
330                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
331                                 abort = true;
332                         }
333                         
334                         if (nameFile == "") {
335                                 vector<string> files; files.push_back(fastaFile);
336                                 parser.getNameFile(files);
337                         }
338                 }
339
340         }
341         catch(exception& e) {
342                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
343                 exit(1);
344         }
345 }
346 //***************************************************************************************************************
347
348 int TrimSeqsCommand::execute(){
349         try{
350         
351                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
352                 
353                 numFPrimers = 0;  //this needs to be initialized
354                 numRPrimers = 0;
355         numSpacers = 0;
356         numLinkers = 0;
357                 createGroup = false;
358                 vector<vector<string> > fastaFileNames;
359                 vector<vector<string> > qualFileNames;
360                 vector<vector<string> > nameFileNames;
361                 
362                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("fasta");
363                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
364                 
365                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("fasta");
366                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
367                 
368                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("qfile");
369                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("qfile");
370                 
371                 if (qFileName != "") {
372                         outputNames.push_back(trimQualFile);
373                         outputNames.push_back(scrapQualFile);
374                         outputTypes["qfile"].push_back(trimQualFile);
375                         outputTypes["qfile"].push_back(scrapQualFile); 
376                 }
377                 
378                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim." + getOutputFileNameTag("name");
379                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap." + getOutputFileNameTag("name");
380                 
381                 if (nameFile != "") {
382                         m->readNames(nameFile, nameMap);
383                         outputNames.push_back(trimNameFile);
384                         outputNames.push_back(scrapNameFile);
385                         outputTypes["name"].push_back(trimNameFile);
386                         outputTypes["name"].push_back(scrapNameFile); 
387                 }
388                 
389                 if (m->control_pressed) { return 0; }
390                 
391                 string outputGroupFileName;
392                 if(oligoFile != ""){
393                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
394                         if (createGroup) {
395                                 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + getOutputFileNameTag("group");
396                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
397                         }
398                 }
399        
400         //fills lines and qlines
401                 setLines(fastaFile, qFileName);
402                 
403         if(processors == 1){
404             driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
405         }else{
406             createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
407         }       
408                 
409                 
410                 if (m->control_pressed) {  return 0; }                  
411         
412                 if(allFiles){
413                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
414                         map<string, string>::iterator it;
415                         set<string> namesToRemove;
416                         for(int i=0;i<fastaFileNames.size();i++){
417                                 for(int j=0;j<fastaFileNames[0].size();j++){
418                                         if (fastaFileNames[i][j] != "") {
419                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
420                                                         if(m->isBlank(fastaFileNames[i][j])){
421                                                                 m->mothurRemove(fastaFileNames[i][j]);
422                                                                 namesToRemove.insert(fastaFileNames[i][j]);
423                                                         
424                                                                 if(qFileName != ""){
425                                                                         m->mothurRemove(qualFileNames[i][j]);
426                                                                         namesToRemove.insert(qualFileNames[i][j]);
427                                                                 }
428                                                                 
429                                                                 if(nameFile != ""){
430                                                                         m->mothurRemove(nameFileNames[i][j]);
431                                                                         namesToRemove.insert(nameFileNames[i][j]);
432                                                                 }
433                                                         }else{  
434                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
435                                                                 if (it == uniqueFastaNames.end()) {     
436                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
437                                                                 }       
438                                                         }
439                                                 }
440                                         }
441                                 }
442                         }
443                         
444                         //remove names for outputFileNames, just cleans up the output
445                         vector<string> outputNames2;
446                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
447                         outputNames = outputNames2;
448                         
449                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
450                                 ifstream in;
451                                 m->openInputFile(it->first, in);
452                                 
453                                 ofstream out;
454                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + getOutputFileNameTag("group");
455                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
456                                 m->openOutputFile(thisGroupName, out);
457                                 
458                                 while (!in.eof()){
459                                         if (m->control_pressed) { break; }
460                                         
461                                         Sequence currSeq(in); m->gobble(in);
462                                         out << currSeq.getName() << '\t' << it->second << endl;
463                     
464                     if (nameFile != "") {
465                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
466                         if (itName != nameMap.end()) { 
467                             vector<string> thisSeqsNames; 
468                             m->splitAtChar(itName->second, thisSeqsNames, ',');
469                             for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
470                                 out << thisSeqsNames[k] << '\t' << it->second << endl;
471                             }
472                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
473                     }
474                                 }
475                                 in.close();
476                                 out.close();
477                         }
478                 }
479                 
480                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
481
482                 //output group counts
483                 m->mothurOutEndLine();
484                 int total = 0;
485                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
486                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
487                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
488                 }
489                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
490                 
491                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
492
493                 //set fasta file as new current fastafile
494                 string current = "";
495                 itTypes = outputTypes.find("fasta");
496                 if (itTypes != outputTypes.end()) {
497                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
498                 }
499                 
500                 itTypes = outputTypes.find("name");
501                 if (itTypes != outputTypes.end()) {
502                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
503                 }
504                 
505                 itTypes = outputTypes.find("qfile");
506                 if (itTypes != outputTypes.end()) {
507                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
508                 }
509                 
510                 itTypes = outputTypes.find("group");
511                 if (itTypes != outputTypes.end()) {
512                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
513                 }
514
515                 m->mothurOutEndLine();
516                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
517                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
518                 m->mothurOutEndLine();
519                 
520                 return 0;       
521                         
522         }
523         catch(exception& e) {
524                 m->errorOut(e, "TrimSeqsCommand", "execute");
525                 exit(1);
526         }
527 }
528                 
529 /**************************************************************************************/
530
531 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {      
532                 
533         try {
534                 
535                 ofstream trimFASTAFile;
536                 m->openOutputFile(trimFileName, trimFASTAFile);
537                 
538                 ofstream scrapFASTAFile;
539                 m->openOutputFile(scrapFileName, scrapFASTAFile);
540                 
541                 ofstream trimQualFile;
542                 ofstream scrapQualFile;
543                 if(qFileName != ""){
544                         m->openOutputFile(trimQFileName, trimQualFile);
545                         m->openOutputFile(scrapQFileName, scrapQualFile);
546                 }
547                 
548                 ofstream trimNameFile;
549                 ofstream scrapNameFile;
550                 if(nameFile != ""){
551                         m->openOutputFile(trimNFileName, trimNameFile);
552                         m->openOutputFile(scrapNFileName, scrapNameFile);
553                 }
554                 
555                 
556                 ofstream outGroupsFile;
557                 if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
558                 if(allFiles){
559                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
560                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
561                                         if (fastaFileNames[i][j] != "") {
562                                                 ofstream temp;
563                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
564                                                 if(qFileName != ""){
565                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
566                                                 }
567                                                 
568                                                 if(nameFile != ""){
569                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
570                                                 }
571                                         }
572                                 }
573                         }
574                 }
575                 
576                 ifstream inFASTA;
577                 m->openInputFile(filename, inFASTA);
578                 inFASTA.seekg(line.start);
579                 
580                 ifstream qFile;
581                 if(qFileName != "")     {
582                         m->openInputFile(qFileName, qFile);
583                         qFile.seekg(qline.start);  
584                 }
585                 
586                 int count = 0;
587                 bool moreSeqs = 1;
588                 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer);
589         
590                 while (moreSeqs) {
591                                 
592                         if (m->control_pressed) { 
593                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
594                                 if (createGroup) {       outGroupsFile.close();   }
595
596                                 if(qFileName != ""){
597                                         qFile.close();
598                                 }
599                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
600
601                                 return 0;
602                         }
603                         
604                         int success = 1;
605                         string trashCode = "";
606                         int currentSeqsDiffs = 0;
607
608                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
609                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
610             
611                         QualityScores currQual;
612                         if(qFileName != ""){
613                                 currQual = QualityScores(qFile);  m->gobble(qFile);
614                 //cout << currQual.getName() << endl;
615                         }
616                         
617                         string origSeq = currSeq.getUnaligned();
618                         if (origSeq != "") {
619                                 
620                                 int barcodeIndex = 0;
621                                 int primerIndex = 0;
622                                 
623                 if(numLinkers != 0){
624                                         success = trimOligos.stripLinker(currSeq, currQual);
625                                         if(success > ldiffs)            {       trashCode += 'k';       }
626                                         else{ currentSeqsDiffs += success;  }
627
628                                 }
629                 
630                                 if(barcodes.size() != 0){
631                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
632                                         if(success > bdiffs)            {       trashCode += 'b';       }
633                                         else{ currentSeqsDiffs += success;  }
634                                 }
635                 
636                 if(rbarcodes.size() != 0){
637                                         success = trimOligos.stripRBarcode(currSeq, currQual, barcodeIndex);
638                                         if(success > bdiffs)            {       trashCode += 'b';       }
639                                         else{ currentSeqsDiffs += success;  }
640                                 }
641                                 
642                 if(numSpacers != 0){
643                                         success = trimOligos.stripSpacer(currSeq, currQual);
644                                         if(success > sdiffs)            {       trashCode += 's';       }
645                                         else{ currentSeqsDiffs += success;  }
646
647                                 }
648                 
649                                 if(numFPrimers != 0){
650                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
651                                         if(success > pdiffs)            {       trashCode += 'f';       }
652                                         else{ currentSeqsDiffs += success;  }
653                                 }
654                                 
655                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
656                                 
657                                 if(numRPrimers != 0){
658                                         success = trimOligos.stripReverse(currSeq, currQual);
659                                         if(!success)                            {       trashCode += 'r';       }
660                                 }
661
662                                 if(keepFirst != 0){
663                                         success = keepFirstTrim(currSeq, currQual);
664                                 }
665                                 
666                                 if(removeLast != 0){
667                                         success = removeLastTrim(currSeq, currQual);
668                                         if(!success)                            {       trashCode += 'l';       }
669                                 }
670
671                                 
672                                 if(qFileName != ""){
673                                         int origLength = currSeq.getNumBases();
674                                         
675                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
676                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
677                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
678                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
679                                         else                                            {       success = 1;                            }
680                                         
681                                         //you don't want to trim, if it fails above then scrap it
682                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
683                                         
684                                         if(!success)                            {       trashCode += 'q';       }
685                                 }                               
686                 
687                                 if(minLength > 0 || maxLength > 0){
688                                         success = cullLength(currSeq);
689                                         if(!success)                            {       trashCode += 'l';       }
690                                 }
691                                 if(maxHomoP > 0){
692                                         success = cullHomoP(currSeq);
693                                         if(!success)                            {       trashCode += 'h';       }
694                                 }
695                                 if(maxAmbig != -1){
696                                         success = cullAmbigs(currSeq);
697                                         if(!success)                            {       trashCode += 'n';       }
698                                 }
699                                 
700                                 if(flip){               // should go last                       
701                                         currSeq.reverseComplement();
702                                         if(qFileName != ""){
703                                                 currQual.flipQScores(); 
704                                         }
705                                 }
706                                 
707                 if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + ", trashcode= " + trashCode); if (trashCode.length() != 0) { m->mothurOutEndLine(); } }
708                 
709                                 if(trashCode.length() == 0){
710                                         currSeq.setAligned(currSeq.getUnaligned());
711                                         currSeq.printSequence(trimFASTAFile);
712                                         
713                                         if(qFileName != ""){
714                                                 currQual.printQScores(trimQualFile);
715                                         }
716                                         
717                     
718                                         if(nameFile != ""){
719                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
720                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
721                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
722                                         }
723                                         
724                                         if (createGroup) {
725                                                 if(barcodes.size() != 0){
726                                                         string thisGroup = barcodeNameVector[barcodeIndex];
727                                                         if (primers.size() != 0) { 
728                                                                 if (primerNameVector[primerIndex] != "") { 
729                                                                         if(thisGroup != "") {
730                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
731                                                                         }else {
732                                                                                 thisGroup = primerNameVector[primerIndex]; 
733                                                                         }
734                                                                 } 
735                                                         }
736                                                         
737                             if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
738                             
739                                                         outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
740                                                         
741                             int numRedundants = 0;
742                                                         if (nameFile != "") {
743                                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
744                                                                 if (itName != nameMap.end()) { 
745                                                                         vector<string> thisSeqsNames; 
746                                                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
747                                     numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
748                                                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
749                                                                                 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
750                                                                         }
751                                                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
752                                                         }
753                                                         
754                                                         map<string, int>::iterator it = groupCounts.find(thisGroup);
755                                                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1 + numRedundants; }
756                                                         else { groupCounts[it->first] += (1 + numRedundants); }
757                                                                 
758                                                 }
759                                         }
760                                         
761                                         if(allFiles){
762                                                 ofstream output;
763                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
764                                                 currSeq.printSequence(output);
765                                                 output.close();
766                                                 
767                                                 if(qFileName != ""){
768                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
769                                                         currQual.printQScores(output);
770                                                         output.close();                                                 
771                                                 }
772                                                 
773                                                 if(nameFile != ""){
774                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
775                                                         if (itName != nameMap.end()) { 
776                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
777                                                                 output << itName->first << '\t' << itName->second << endl; 
778                                                                 output.close();
779                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
780                                                 }
781                                         }
782                                 }
783                                 else{
784                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
785                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
786                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
787                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
788                                         }
789                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
790                                         currSeq.setUnaligned(origSeq);
791                                         currSeq.setAligned(origSeq);
792                                         currSeq.printSequence(scrapFASTAFile);
793                                         if(qFileName != ""){
794                                                 currQual.printQScores(scrapQualFile);
795                                         }
796                                 }
797                                 count++;
798                         }
799                         
800                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
801                                 unsigned long long pos = inFASTA.tellg();
802                                 if ((pos == -1) || (pos >= line.end)) { break; }
803                         
804                         #else
805                                 if (inFASTA.eof()) { break; }
806                         #endif
807                         
808                         //report progress
809                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
810                         
811                 }
812                 //report progress
813                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
814                 
815                 
816                 inFASTA.close();
817                 trimFASTAFile.close();
818                 scrapFASTAFile.close();
819                 if (createGroup) {       outGroupsFile.close();   }
820                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
821                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
822                 
823                 return count;
824         }
825         catch(exception& e) {
826                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
827                 exit(1);
828         }
829 }
830
831 /**************************************************************************************************/
832
833 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
834         try {
835         
836         int process = 1;
837                 int exitCommand = 1;
838                 processIDS.clear();
839                 
840 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
841                                 //loop through and create all the processes you want
842                 while (process != processors) {
843                         int pid = fork();
844                         
845                         if (pid > 0) {
846                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
847                                 process++;
848                         }else if (pid == 0){
849                                 
850                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
851                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
852                                 vector<vector<string> > tempNameFileNames = nameFileNames;
853
854                                 if(allFiles){
855                                         ofstream temp;
856
857                                         for(int i=0;i<tempFASTAFileNames.size();i++){
858                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
859                                                         if (tempFASTAFileNames[i][j] != "") {
860                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
861                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
862
863                                                                 if(qFileName != ""){
864                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
865                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
866                                                                 }
867                                                                 if(nameFile != ""){
868                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
869                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
870                                                                 }
871                                                         }
872                                                 }
873                                         }
874                                 }
875                                                         
876                                 driverCreateTrim(filename,
877                                                                  qFileName,
878                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
879                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
880                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
881                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
882                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
883                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
884                                                                  (groupFile + toString(getpid()) + ".temp"),
885                                                                  tempFASTAFileNames,
886                                                                  tempPrimerQualFileNames,
887                                                                  tempNameFileNames,
888                                                                  lines[process],
889                                                                  qLines[process]);
890                 
891                 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); }
892                                 
893                                 //pass groupCounts to parent
894                                 if(createGroup){
895                                         ofstream out;
896                                         string tempFile = filename + toString(getpid()) + ".num.temp";
897                                         m->openOutputFile(tempFile, out);
898                                         
899                                         out << groupCounts.size() << endl;
900                                         
901                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
902                                                 out << it->first << '\t' << it->second << endl;
903                                         }
904                                         out.close();
905                                 }
906                                 exit(0);
907                         }else { 
908                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
909                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
910                                 exit(0);
911                         }
912                 }
913                 
914                 //parent do my part
915                 ofstream temp;
916                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
917                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
918                 if(qFileName != ""){
919                         m->openOutputFile(trimQualFileName, temp);              temp.close();
920                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
921                 }
922                 if (nameFile != "") {
923                         m->openOutputFile(trimNameFileName, temp);              temp.close();
924                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
925                 }
926
927                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
928                 
929                 //force parent to wait until all the processes are done
930                 for (int i=0;i<processIDS.size();i++) { 
931                         int temp = processIDS[i];
932                         wait(&temp);
933                 }
934 #else
935         //////////////////////////////////////////////////////////////////////////////////////////////////////
936                 //Windows version shared memory, so be careful when passing variables through the trimData struct. 
937                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
938                 //////////////////////////////////////////////////////////////////////////////////////////////////////
939                 
940                 vector<trimData*> pDataArray; 
941                 DWORD   dwThreadIdArray[processors-1];
942                 HANDLE  hThreadArray[processors-1]; 
943                 
944                 //Create processor worker threads.
945                 for( int i=0; i<processors-1; i++){
946                         
947             string extension = "";
948                         if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
949             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
950             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
951             vector<vector<string> > tempNameFileNames = nameFileNames;
952             
953             if(allFiles){
954                 ofstream temp;
955                 
956                 for(int i=0;i<tempFASTAFileNames.size();i++){
957                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
958                         if (tempFASTAFileNames[i][j] != "") {
959                             tempFASTAFileNames[i][j] += extension;
960                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
961                             
962                             if(qFileName != ""){
963                                 tempPrimerQualFileNames[i][j] += extension;
964                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
965                             }
966                             if(nameFile != ""){
967                                 tempNameFileNames[i][j] += extension;
968                                 m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
969                             }
970                         }
971                     }
972                 }
973             }
974
975             
976                         trimData* tempTrim = new trimData(filename,
977                                               qFileName, nameFile,
978                                               (trimFASTAFileName+extension),
979                                               (scrapFASTAFileName+extension),
980                                               (trimQualFileName+extension),
981                                               (scrapQualFileName+extension),
982                                               (trimNameFileName+extension),
983                                               (scrapNameFileName+extension),
984                                               (groupFile+extension),
985                                               tempFASTAFileNames,
986                                               tempPrimerQualFileNames,
987                                               tempNameFileNames,
988                                               lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
989                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer, 
990                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
991                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
992                                              minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
993                         pDataArray.push_back(tempTrim);
994             
995                         hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
996                 }
997         
998         //parent do my part
999                 ofstream temp;
1000                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
1001                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
1002                 if(qFileName != ""){
1003                         m->openOutputFile(trimQualFileName, temp);              temp.close();
1004                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
1005                 }
1006                 if (nameFile != "") {
1007                         m->openOutputFile(trimNameFileName, temp);              temp.close();
1008                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
1009                 }
1010         
1011                 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
1012         processIDS.push_back(processors-1);
1013
1014         
1015                 //Wait until all threads have terminated.
1016                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1017                 
1018                 //Close all thread handles and free memory allocations.
1019                 for(int i=0; i < pDataArray.size(); i++){
1020                         for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
1021                 map<string, int>::iterator it2 = groupCounts.find(it->first);
1022                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
1023                 else { groupCounts[it->first] += it->second; }
1024             }
1025             CloseHandle(hThreadArray[i]);
1026                         delete pDataArray[i];
1027                 }
1028         
1029 #endif          
1030         
1031         
1032         //append files
1033                 for(int i=0;i<processIDS.size();i++){
1034                         
1035                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1036                         
1037                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
1038                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
1039                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1040                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1041                         
1042                         if(qFileName != ""){
1043                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1044                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1045                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1046                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1047                         }
1048                         
1049                         if(nameFile != ""){
1050                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1051                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1052                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1053                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1054                         }
1055                         
1056                         if(createGroup){
1057                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1058                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1059                         }
1060                         
1061                         
1062                         if(allFiles){
1063                                 for(int j=0;j<fastaFileNames.size();j++){
1064                                         for(int k=0;k<fastaFileNames[j].size();k++){
1065                                                 if (fastaFileNames[j][k] != "") {
1066                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1067                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1068                                                         
1069                                                         if(qFileName != ""){
1070                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1071                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1072                                                         }
1073                                                         
1074                                                         if(nameFile != ""){
1075                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1076                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1077                                                         }
1078                                                 }
1079                                         }
1080                                 }
1081                         }
1082                         
1083             #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1084                         if(createGroup){
1085                                 ifstream in;
1086                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1087                                 m->openInputFile(tempFile, in);
1088                                 int tempNum;
1089                                 string group;
1090                                 
1091                                 in >> tempNum; m->gobble(in);
1092                                 
1093                                 if (tempNum != 0) {
1094                                         while (!in.eof()) { 
1095                                                 in >> group >> tempNum; m->gobble(in);
1096                         
1097                                                 map<string, int>::iterator it = groupCounts.find(group);
1098                                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
1099                                                 else { groupCounts[it->first] += tempNum; }
1100                                         }
1101                                 }
1102                                 in.close(); m->mothurRemove(tempFile);
1103                         }
1104             #endif
1105                 }
1106
1107         return exitCommand;
1108         }
1109         catch(exception& e) {
1110                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1111                 exit(1);
1112         }
1113 }
1114
1115 /**************************************************************************************************/
1116
1117 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1118         try {
1119         
1120         vector<unsigned long long> fastaFilePos;
1121                 vector<unsigned long long> qfileFilePos;
1122                 
1123                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1124                 //set file positions for fasta file
1125                 fastaFilePos = m->divideFile(filename, processors);
1126                 
1127                 //get name of first sequence in each chunk
1128                 map<string, int> firstSeqNames;
1129                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1130                         ifstream in;
1131                         m->openInputFile(filename, in);
1132                         in.seekg(fastaFilePos[i]);
1133                 
1134                         Sequence temp(in); 
1135                         firstSeqNames[temp.getName()] = i;
1136                 
1137                         in.close();
1138                 }
1139                 
1140                 if(qfilename != "")     {
1141             //seach for filePos of each first name in the qfile and save in qfileFilePos
1142             ifstream inQual;
1143             m->openInputFile(qfilename, inQual);
1144             
1145             string input;
1146             while(!inQual.eof()){       
1147                 input = m->getline(inQual);
1148                 
1149                 if (input.length() != 0) {
1150                     if(input[0] == '>'){ //this is a sequence name line
1151                         istringstream nameStream(input);
1152                         
1153                         string sname = "";  nameStream >> sname;
1154                         sname = sname.substr(1);
1155                         
1156                         map<string, int>::iterator it = firstSeqNames.find(sname);
1157                         
1158                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
1159                             unsigned long long pos = inQual.tellg(); 
1160                             qfileFilePos.push_back(pos - input.length() - 1);   
1161                             firstSeqNames.erase(it);
1162                         }
1163                     }
1164                 }
1165                 
1166                 if (firstSeqNames.size() == 0) { break; }
1167             }
1168             inQual.close();
1169             
1170             
1171             if (firstSeqNames.size() != 0) { 
1172                 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1173                     m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1174                 }
1175                 qFileName = "";
1176                 return processors;
1177             }
1178             
1179             //get last file position of qfile
1180             FILE * pFile;
1181             unsigned long long size;
1182             
1183             //get num bytes in file
1184             pFile = fopen (qfilename.c_str(),"rb");
1185             if (pFile==NULL) perror ("Error opening file");
1186             else{
1187                 fseek (pFile, 0, SEEK_END);
1188                 size=ftell (pFile);
1189                 fclose (pFile);
1190             }
1191             
1192             qfileFilePos.push_back(size);
1193         }
1194         
1195         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1196             if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); }
1197                         lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1198                         if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
1199                 }       
1200                 if(qfilename == "")     {       qLines = lines; } //files with duds
1201                 
1202                 return processors;
1203                 
1204                 #else
1205             
1206         if (processors == 1) { //save time
1207                         //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1208                         //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1209             lines.push_back(linePair(0, 1000));
1210             if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
1211         }else{
1212             int numFastaSeqs = 0;
1213             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
1214             if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1215         
1216             if (qfilename != "") { 
1217                 int numQualSeqs = 0;
1218                 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
1219                 
1220                 if (numFastaSeqs != numQualSeqs) {
1221                     m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
1222                 }
1223             }
1224         
1225             //figure out how many sequences you have to process
1226             int numSeqsPerProcessor = numFastaSeqs / processors;
1227             for (int i = 0; i < processors; i++) {
1228                 int startIndex =  i * numSeqsPerProcessor;
1229                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
1230                 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1231                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1232             }
1233         }
1234             if(qfilename == "") {       qLines = lines; } //files with duds
1235                         return 1;
1236                 
1237                 #endif
1238         }
1239         catch(exception& e) {
1240                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1241                 exit(1);
1242         }
1243 }
1244
1245 //***************************************************************************************************************
1246
1247 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1248         try {
1249                 ifstream inOligos;
1250                 m->openInputFile(oligoFile, inOligos);
1251                 
1252                 ofstream test;
1253                 
1254                 string type, oligo, group;
1255
1256                 int indexPrimer = 0;
1257                 int indexBarcode = 0;
1258                 
1259                 while(!inOligos.eof()){
1260
1261                         inOligos >> type; 
1262             
1263                         if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
1264             
1265                         if(type[0] == '#'){
1266                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1267                                 m->gobble(inOligos);
1268                         }
1269                         else{
1270                                 m->gobble(inOligos);
1271                                 //make type case insensitive
1272                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1273                                 
1274                                 inOligos >> oligo;
1275                 
1276                 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
1277                                 
1278                                 for(int i=0;i<oligo.length();i++){
1279                                         oligo[i] = toupper(oligo[i]);
1280                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1281                                 }
1282                                 
1283                                 if(type == "FORWARD"){
1284                                         group = "";
1285                                         
1286                                         // get rest of line in case there is a primer name
1287                                         while (!inOligos.eof()) {       
1288                                                 char c = inOligos.get(); 
1289                                                 if (c == 10 || c == 13){        break;  }
1290                                                 else if (c == 32 || c == 9){;} //space or tab
1291                                                 else {  group += c;  }
1292                                         } 
1293                                         
1294                                         //check for repeat barcodes
1295                                         map<string, int>::iterator itPrime = primers.find(oligo);
1296                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1297                                         
1298                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); }  }
1299                     
1300                                         primers[oligo]=indexPrimer; indexPrimer++;              
1301                                         primerNameVector.push_back(group);
1302                                 }
1303                                 else if(type == "REVERSE"){
1304                                         //Sequence oligoRC("reverse", oligo);
1305                                         //oligoRC.reverseComplement();
1306                     string oligoRC = reverseOligo(oligo);
1307                                         revPrimer.push_back(oligoRC);
1308                                 }
1309                                 else if(type == "BARCODE"){
1310                                         inOligos >> group;
1311                     
1312                     //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
1313                     //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
1314                                         string temp = "";
1315                     while (!inOligos.eof())     {       
1316                                                 char c = inOligos.get(); 
1317                                                 if (c == 10 || c == 13){        break;  }
1318                                                 else if (c == 32 || c == 9){;} //space or tab
1319                                                 else {  temp += c;  }
1320                                         } 
1321                                         
1322                     //then this is illumina data with 4 columns
1323                     if (temp != "") {  
1324                         
1325                         for(int i=0;i<group.length();i++){
1326                             group[i] = toupper(group[i]);
1327                             if(group[i] == 'U') {       group[i] = 'T'; }
1328                         }
1329                         
1330                         if (m->debug) { m->mothurOut("[DEBUG]: reading reverse " + group + ", and group = " + temp + ".\n"); }
1331                         
1332                         string reverseBarcode = reverseOligo(group); //reverse barcode
1333                         //check for repeat barcodes
1334                         map<string, int>::iterator itBar = rbarcodes.find(reverseBarcode);
1335                         if (itBar != rbarcodes.end()) { m->mothurOut("reverse barcode " + group + " is in your oligos file already."); m->mothurOutEndLine();  }
1336                         
1337                         group = temp;
1338                         rbarcodes[reverseBarcode]=indexBarcode; 
1339                     }else { if (m->debug) { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); } }
1340                         
1341                                         //check for repeat barcodes
1342                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1343                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1344                                                 
1345                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1346                                         barcodeNameVector.push_back(group);
1347                                 }else if(type == "LINKER"){
1348                                         linker.push_back(oligo);
1349                                 }else if(type == "SPACER"){
1350                                         spacer.push_back(oligo);
1351                                 }
1352                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1353                         }
1354                         m->gobble(inOligos);
1355                 }       
1356                 inOligos.close();
1357                 
1358                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1359                 
1360                 //add in potential combos
1361                 if(barcodeNameVector.size() == 0){
1362                         barcodes[""] = 0;
1363                         barcodeNameVector.push_back("");                        
1364                 }
1365                 
1366                 if(primerNameVector.size() == 0){
1367                         primers[""] = 0;
1368                         primerNameVector.push_back("");                 
1369                 }
1370                 
1371                 fastaFileNames.resize(barcodeNameVector.size());
1372                 for(int i=0;i<fastaFileNames.size();i++){
1373                         fastaFileNames[i].assign(primerNameVector.size(), "");
1374                 }
1375                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1376                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1377                 
1378                 if(allFiles){
1379                         set<string> uniqueNames; //used to cleanup outputFileNames
1380                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1381                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1382                                         
1383                                         string primerName = primerNameVector[itPrimer->second];
1384                                         string barcodeName = barcodeNameVector[itBar->second];
1385                                         
1386                                         string comboGroupName = "";
1387                                         string fastaFileName = "";
1388                                         string qualFileName = "";
1389                                         string nameFileName = "";
1390                                         
1391                                         if(primerName == ""){
1392                                                 comboGroupName = barcodeNameVector[itBar->second];
1393                                         }
1394                                         else{
1395                                                 if(barcodeName == ""){
1396                                                         comboGroupName = primerNameVector[itPrimer->second];
1397                                                 }
1398                                                 else{
1399                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1400                                                 }
1401                                         }
1402                                         
1403                                         
1404                                         ofstream temp;
1405                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1406                                         if (uniqueNames.count(fastaFileName) == 0) {
1407                                                 outputNames.push_back(fastaFileName);
1408                                                 outputTypes["fasta"].push_back(fastaFileName);
1409                                                 uniqueNames.insert(fastaFileName);
1410                                         }
1411                                         
1412                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1413                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1414                                         
1415                                         if(qFileName != ""){
1416                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1417                                                 if (uniqueNames.count(qualFileName) == 0) {
1418                                                         outputNames.push_back(qualFileName);
1419                                                         outputTypes["qfile"].push_back(qualFileName);
1420                                                 }
1421                                                 
1422                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1423                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1424                                         }
1425                                         
1426                                         if(nameFile != ""){
1427                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1428                                                 if (uniqueNames.count(nameFileName) == 0) {
1429                                                         outputNames.push_back(nameFileName);
1430                                                         outputTypes["name"].push_back(nameFileName);
1431                                                 }
1432                                                 
1433                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1434                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1435                                         }
1436                                         
1437                                 }
1438                         }
1439                 }
1440                 numFPrimers = primers.size();
1441                 numRPrimers = revPrimer.size();
1442         numLinkers = linker.size();
1443         numSpacers = spacer.size();
1444                 
1445                 bool allBlank = true;
1446                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1447                         if (barcodeNameVector[i] != "") {
1448                                 allBlank = false;
1449                                 break;
1450                         }
1451                 }
1452                 for (int i = 0; i < primerNameVector.size(); i++) {
1453                         if (primerNameVector[i] != "") {
1454                                 allBlank = false;
1455                                 break;
1456                         }
1457                 }
1458
1459                 if (allBlank) {
1460                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1461                         allFiles = false;
1462                         return false;
1463                 }
1464                 
1465                 return true;
1466                 
1467         }
1468         catch(exception& e) {
1469                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1470                 exit(1);
1471         }
1472 }
1473 //***************************************************************************************************************
1474
1475 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1476         try {
1477                 bool success = 1;
1478                 if(qscores.getName() != ""){
1479                         qscores.trimQScores(-1, keepFirst);
1480                 }
1481                 sequence.trim(keepFirst);
1482                 return success;
1483         }
1484         catch(exception& e) {
1485                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1486                 exit(1);
1487         }
1488         
1489 }       
1490
1491 //***************************************************************************************************************
1492
1493 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1494         try {
1495                 bool success = 0;
1496                 
1497                 int length = sequence.getNumBases() - removeLast;
1498                 
1499                 if(length > 0){
1500                         if(qscores.getName() != ""){
1501                                 qscores.trimQScores(-1, length);
1502                         }
1503                         sequence.trim(length);
1504                         success = 1;
1505                 }
1506                 else{
1507                         success = 0;
1508                 }
1509
1510                 return success;
1511         }
1512         catch(exception& e) {
1513                 m->errorOut(e, "removeLastTrim", "countDiffs");
1514                 exit(1);
1515         }
1516         
1517 }       
1518
1519 //***************************************************************************************************************
1520
1521 bool TrimSeqsCommand::cullLength(Sequence& seq){
1522         try {
1523         
1524                 int length = seq.getNumBases();
1525                 bool success = 0;       //guilty until proven innocent
1526                 
1527                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1528                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1529                 else                                                                                            {       success = 0;    }
1530                 
1531                 return success;
1532         
1533         }
1534         catch(exception& e) {
1535                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1536                 exit(1);
1537         }
1538         
1539 }
1540
1541 //***************************************************************************************************************
1542
1543 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1544         try {
1545                 int longHomoP = seq.getLongHomoPolymer();
1546                 bool success = 0;       //guilty until proven innocent
1547                 
1548                 if(longHomoP <= maxHomoP){      success = 1;    }
1549                 else                                    {       success = 0;    }
1550                 
1551                 return success;
1552         }
1553         catch(exception& e) {
1554                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1555                 exit(1);
1556         }
1557         
1558 }
1559 //********************************************************************/
1560 string TrimSeqsCommand::reverseOligo(string oligo){
1561         try {
1562         string reverse = "";
1563         
1564         for(int i=oligo.length()-1;i>=0;i--){
1565             
1566             if(oligo[i] == 'A')         {       reverse += 'T'; }
1567             else if(oligo[i] == 'T'){   reverse += 'A'; }
1568             else if(oligo[i] == 'U'){   reverse += 'A'; }
1569             
1570             else if(oligo[i] == 'G'){   reverse += 'C'; }
1571             else if(oligo[i] == 'C'){   reverse += 'G'; }
1572             
1573             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1574             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1575             
1576             else if(oligo[i] == 'M'){   reverse += 'K'; }
1577             else if(oligo[i] == 'K'){   reverse += 'M'; }
1578             
1579             else if(oligo[i] == 'W'){   reverse += 'W'; }
1580             else if(oligo[i] == 'S'){   reverse += 'S'; }
1581             
1582             else if(oligo[i] == 'B'){   reverse += 'V'; }
1583             else if(oligo[i] == 'V'){   reverse += 'B'; }
1584             
1585             else if(oligo[i] == 'D'){   reverse += 'H'; }
1586             else if(oligo[i] == 'H'){   reverse += 'D'; }
1587             
1588             else                                                {       reverse += 'N'; }
1589         }
1590         
1591         
1592         return reverse;
1593     }
1594         catch(exception& e) {
1595                 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1596                 exit(1);
1597         }
1598 }
1599
1600 //***************************************************************************************************************
1601
1602 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1603         try {
1604                 int numNs = seq.getAmbigBases();
1605                 bool success = 0;       //guilty until proven innocent
1606                 
1607                 if(numNs <= maxAmbig)   {       success = 1;    }
1608                 else                                    {       success = 0;    }
1609                 
1610                 return success;
1611         }
1612         catch(exception& e) {
1613                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1614                 exit(1);
1615         }
1616         
1617 }
1618 //***************************************************************************************************************