]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
changes while testing 1.27
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::setParameters(){        
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
22                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
23                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
24                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
25                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
26                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
27                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
28         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
29                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
30         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
31                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
32                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
33                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
34                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
35                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
36                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
37                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
38                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
39                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
40                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
41                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
42                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
43                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
44                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
45                         
46                 vector<string> myArray;
47                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string TrimSeqsCommand::getHelpString(){        
57         try {
58                 string helpString = "";
59                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
60                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
61                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
62                 helpString += "The fasta parameter is required.\n";
63                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
64                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
65                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
66                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
67                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
68                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
69                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
70                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
71                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
72                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
73         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
74                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
75                 helpString += "The qfile parameter allows you to provide a quality file.\n";
76                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
77                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
78                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
79                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
80                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
81                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
82                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
83                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
84                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
85                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
86                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
87                 helpString += "The trim.seqs command should be in the following format: \n";
88                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
89                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
90                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
91                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
92                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101 string TrimSeqsCommand::getOutputFileNameTag(string type, string inputName=""){ 
102         try {
103         string outputFileName = "";
104                 map<string, vector<string> >::iterator it;
105         
106         //is this a type this command creates
107         it = outputTypes.find(type);
108         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
109         else {
110             if (type == "qfile")            {   outputFileName =  "qual";   }
111             else if (type == "fasta")            {   outputFileName =  "fasta";   }
112             else if (type == "group")            {   outputFileName =  "groups";   }
113             else if (type == "name")            {   outputFileName =  "names";   }
114             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
115         }
116         return outputFileName;
117         }
118         catch(exception& e) {
119                 m->errorOut(e, "TrimSeqsCommand", "getOutputFileNameTag");
120                 exit(1);
121         }
122 }
123
124
125 //**********************************************************************************************************************
126
127 TrimSeqsCommand::TrimSeqsCommand(){     
128         try {
129                 abort = true; calledHelp = true; 
130                 setParameters();
131                 vector<string> tempOutNames;
132                 outputTypes["fasta"] = tempOutNames;
133                 outputTypes["qfile"] = tempOutNames;
134                 outputTypes["group"] = tempOutNames;
135                 outputTypes["name"] = tempOutNames;
136         }
137         catch(exception& e) {
138                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
139                 exit(1);
140         }
141 }
142 //***************************************************************************************************************
143
144 TrimSeqsCommand::TrimSeqsCommand(string option)  {
145         try {
146                 
147                 abort = false; calledHelp = false;   
148                 comboStarts = 0;
149                 
150                 //allow user to run help
151                 if(option == "help") { help(); abort = true; calledHelp = true; }
152                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
153                 
154                 else {
155                         vector<string> myArray = setParameters();
156                         
157                         OptionParser parser(option);
158                         map<string,string> parameters = parser.getParameters();
159                         
160                         ValidParameters validParameter;
161                         map<string,string>::iterator it;
162                         
163                         //check to make sure all parameters are valid for command
164                         for (it = parameters.begin(); it != parameters.end(); it++) { 
165                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
166                         }
167                         
168                         //initialize outputTypes
169                         vector<string> tempOutNames;
170                         outputTypes["fasta"] = tempOutNames;
171                         outputTypes["qfile"] = tempOutNames;
172                         outputTypes["group"] = tempOutNames;
173                         outputTypes["name"] = tempOutNames;
174                         
175                         //if the user changes the input directory command factory will send this info to us in the output parameter 
176                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
177                         if (inputDir == "not found"){   inputDir = "";          }
178                         else {
179                                 string path;
180                                 it = parameters.find("fasta");
181                                 //user has given a template file
182                                 if(it != parameters.end()){ 
183                                         path = m->hasPath(it->second);
184                                         //if the user has not given a path then, add inputdir. else leave path alone.
185                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
186                                 }
187                                 
188                                 it = parameters.find("oligos");
189                                 //user has given a template file
190                                 if(it != parameters.end()){ 
191                                         path = m->hasPath(it->second);
192                                         //if the user has not given a path then, add inputdir. else leave path alone.
193                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
194                                 }
195                                 
196                                 it = parameters.find("qfile");
197                                 //user has given a template file
198                                 if(it != parameters.end()){ 
199                                         path = m->hasPath(it->second);
200                                         //if the user has not given a path then, add inputdir. else leave path alone.
201                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
202                                 }
203                                 
204                                 it = parameters.find("name");
205                                 //user has given a template file
206                                 if(it != parameters.end()){ 
207                                         path = m->hasPath(it->second);
208                                         //if the user has not given a path then, add inputdir. else leave path alone.
209                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
210                                 }
211                                 
212                         }
213
214                         
215                         //check for required parameters
216                         fastaFile = validParameter.validFile(parameters, "fasta", true);
217                         if (fastaFile == "not found") {                                 
218                                 fastaFile = m->getFastaFile(); 
219                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
220                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
221                         }else if (fastaFile == "not open") { abort = true; }    
222                         else { m->setFastaFile(fastaFile); }
223                         
224                         //if the user changes the output directory command factory will send this info to us in the output parameter 
225                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
226                                 outputDir = ""; 
227                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
228                         }
229                 
230                         
231                         //check for optional parameter and set defaults
232                         // ...at some point should added some additional type checking...
233                         string temp;
234                         temp = validParameter.validFile(parameters, "flip", false);
235                         if (temp == "not found")    {   flip = 0;       }
236                         else {  flip = m->isTrue(temp);         }
237                 
238                         temp = validParameter.validFile(parameters, "oligos", true);
239                         if (temp == "not found"){       oligoFile = "";         }
240                         else if(temp == "not open"){    abort = true;   } 
241                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
242                         
243                         
244                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
245                         m->mothurConvert(temp, maxAmbig);  
246
247                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
248                         m->mothurConvert(temp, maxHomoP);  
249
250                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
251                         m->mothurConvert(temp, minLength); 
252                         
253                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
254                         m->mothurConvert(temp, maxLength);
255                         
256                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
257                         m->mothurConvert(temp, bdiffs);
258                         
259                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
260                         m->mothurConvert(temp, pdiffs);
261             
262             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
263                         m->mothurConvert(temp, ldiffs);
264             
265             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
266                         m->mothurConvert(temp, sdiffs);
267                         
268                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
269                         m->mothurConvert(temp, tdiffs);
270                         
271                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
272                         
273                         temp = validParameter.validFile(parameters, "qfile", true);     
274                         if (temp == "not found")        {       qFileName = "";         }
275                         else if(temp == "not open")     {       abort = true;           }
276                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
277                         
278                         temp = validParameter.validFile(parameters, "name", true);      
279                         if (temp == "not found")        {       nameFile = "";          }
280                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
281                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
282                         
283                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
284                         m->mothurConvert(temp, qThreshold);
285                         
286                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
287                         qtrim = m->isTrue(temp);
288
289                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
290                         convert(temp, qRollAverage);
291
292                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
293                         convert(temp, qWindowAverage);
294
295                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
296                         convert(temp, qWindowSize);
297
298                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
299                         convert(temp, qWindowStep);
300
301                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
302                         convert(temp, qAverage);
303
304                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
305                         convert(temp, keepFirst);
306
307                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
308                         convert(temp, removeLast);
309                         
310                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
311                         allFiles = m->isTrue(temp);
312             
313             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
314                         keepforward = m->isTrue(temp);
315                         
316                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
317                         m->setProcessors(temp);
318                         m->mothurConvert(temp, processors); 
319                         
320                         
321                         if(allFiles && (oligoFile == "")){
322                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
323                         }
324                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
325                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
326                                 qAverage=0;
327                                 qThreshold=0;
328                         }
329                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
330                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
331                                 abort = true;
332                         }
333                         
334                         if (nameFile == "") {
335                                 vector<string> files; files.push_back(fastaFile);
336                                 parser.getNameFile(files);
337                         }
338                 }
339
340         }
341         catch(exception& e) {
342                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
343                 exit(1);
344         }
345 }
346 //***************************************************************************************************************
347
348 int TrimSeqsCommand::execute(){
349         try{
350         
351                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
352                 
353                 numFPrimers = 0;  //this needs to be initialized
354                 numRPrimers = 0;
355         numSpacers = 0;
356         numLinkers = 0;
357                 createGroup = false;
358                 vector<vector<string> > fastaFileNames;
359                 vector<vector<string> > qualFileNames;
360                 vector<vector<string> > nameFileNames;
361                 
362                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("fasta");
363                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
364                 
365                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("fasta");
366                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
367                 
368                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("qfile");
369                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("qfile");
370                 
371                 if (qFileName != "") {
372                         outputNames.push_back(trimQualFile);
373                         outputNames.push_back(scrapQualFile);
374                         outputTypes["qfile"].push_back(trimQualFile);
375                         outputTypes["qfile"].push_back(scrapQualFile); 
376                 }
377                 
378                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim." + getOutputFileNameTag("name");
379                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap." + getOutputFileNameTag("name");
380                 
381                 if (nameFile != "") {
382                         m->readNames(nameFile, nameMap);
383                         outputNames.push_back(trimNameFile);
384                         outputNames.push_back(scrapNameFile);
385                         outputTypes["name"].push_back(trimNameFile);
386                         outputTypes["name"].push_back(scrapNameFile); 
387                 }
388                 
389                 if (m->control_pressed) { return 0; }
390                 
391                 string outputGroupFileName;
392                 if(oligoFile != ""){
393                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
394                         if (createGroup) {
395                                 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + getOutputFileNameTag("group");
396                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
397                         }
398                 }
399        
400         //fills lines and qlines
401                 setLines(fastaFile, qFileName);
402                 
403         if(processors == 1){
404             driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
405         }else{
406             createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
407         }       
408                 
409                 
410                 if (m->control_pressed) {  return 0; }                  
411         
412                 if(allFiles){
413                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
414                         map<string, string>::iterator it;
415                         set<string> namesToRemove;
416                         for(int i=0;i<fastaFileNames.size();i++){
417                                 for(int j=0;j<fastaFileNames[0].size();j++){
418                                         if (fastaFileNames[i][j] != "") {
419                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
420                                                         if(m->isBlank(fastaFileNames[i][j])){
421                                                                 m->mothurRemove(fastaFileNames[i][j]);
422                                                                 namesToRemove.insert(fastaFileNames[i][j]);
423                                                         
424                                                                 if(qFileName != ""){
425                                                                         m->mothurRemove(qualFileNames[i][j]);
426                                                                         namesToRemove.insert(qualFileNames[i][j]);
427                                                                 }
428                                                                 
429                                                                 if(nameFile != ""){
430                                                                         m->mothurRemove(nameFileNames[i][j]);
431                                                                         namesToRemove.insert(nameFileNames[i][j]);
432                                                                 }
433                                                         }else{  
434                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
435                                                                 if (it == uniqueFastaNames.end()) {     
436                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
437                                                                 }       
438                                                         }
439                                                 }
440                                         }
441                                 }
442                         }
443                         
444                         //remove names for outputFileNames, just cleans up the output
445                         vector<string> outputNames2;
446                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
447                         outputNames = outputNames2;
448                         
449                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
450                                 ifstream in;
451                                 m->openInputFile(it->first, in);
452                                 
453                                 ofstream out;
454                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + getOutputFileNameTag("group");
455                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
456                                 m->openOutputFile(thisGroupName, out);
457                                 
458                                 while (!in.eof()){
459                                         if (m->control_pressed) { break; }
460                                         
461                                         Sequence currSeq(in); m->gobble(in);
462                                         out << currSeq.getName() << '\t' << it->second << endl;
463                     
464                     if (nameFile != "") {
465                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
466                         if (itName != nameMap.end()) { 
467                             vector<string> thisSeqsNames; 
468                             m->splitAtChar(itName->second, thisSeqsNames, ',');
469                             for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
470                                 out << thisSeqsNames[k] << '\t' << it->second << endl;
471                             }
472                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
473                     }
474                                 }
475                                 in.close();
476                                 out.close();
477                         }
478                 }
479                 
480                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
481
482                 //output group counts
483                 m->mothurOutEndLine();
484                 int total = 0;
485                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
486                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
487                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
488                 }
489                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
490                 
491                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
492
493                 //set fasta file as new current fastafile
494                 string current = "";
495                 itTypes = outputTypes.find("fasta");
496                 if (itTypes != outputTypes.end()) {
497                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
498                 }
499                 
500                 itTypes = outputTypes.find("name");
501                 if (itTypes != outputTypes.end()) {
502                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
503                 }
504                 
505                 itTypes = outputTypes.find("qfile");
506                 if (itTypes != outputTypes.end()) {
507                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
508                 }
509                 
510                 itTypes = outputTypes.find("group");
511                 if (itTypes != outputTypes.end()) {
512                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
513                 }
514
515                 m->mothurOutEndLine();
516                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
517                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
518                 m->mothurOutEndLine();
519                 
520                 return 0;       
521                         
522         }
523         catch(exception& e) {
524                 m->errorOut(e, "TrimSeqsCommand", "execute");
525                 exit(1);
526         }
527 }
528                 
529 /**************************************************************************************/
530
531 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {      
532                 
533         try {
534                 
535                 ofstream trimFASTAFile;
536                 m->openOutputFile(trimFileName, trimFASTAFile);
537                 
538                 ofstream scrapFASTAFile;
539                 m->openOutputFile(scrapFileName, scrapFASTAFile);
540                 
541                 ofstream trimQualFile;
542                 ofstream scrapQualFile;
543                 if(qFileName != ""){
544                         m->openOutputFile(trimQFileName, trimQualFile);
545                         m->openOutputFile(scrapQFileName, scrapQualFile);
546                 }
547                 
548                 ofstream trimNameFile;
549                 ofstream scrapNameFile;
550                 if(nameFile != ""){
551                         m->openOutputFile(trimNFileName, trimNameFile);
552                         m->openOutputFile(scrapNFileName, scrapNameFile);
553                 }
554                 
555                 
556                 ofstream outGroupsFile;
557                 if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
558                 if(allFiles){
559                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
560                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
561                                         if (fastaFileNames[i][j] != "") {
562                                                 ofstream temp;
563                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
564                                                 if(qFileName != ""){
565                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
566                                                 }
567                                                 
568                                                 if(nameFile != ""){
569                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
570                                                 }
571                                         }
572                                 }
573                         }
574                 }
575                 
576                 ifstream inFASTA;
577                 m->openInputFile(filename, inFASTA);
578                 inFASTA.seekg(line.start);
579                 
580                 ifstream qFile;
581                 if(qFileName != "")     {
582                         m->openInputFile(qFileName, qFile);
583                         qFile.seekg(qline.start);  
584                 }
585                 
586                 int count = 0;
587                 bool moreSeqs = 1;
588                 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer);
589         
590                 while (moreSeqs) {
591                                 
592                         if (m->control_pressed) { 
593                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
594                                 if (createGroup) {       outGroupsFile.close();   }
595
596                                 if(qFileName != ""){
597                                         qFile.close();
598                                 }
599                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
600
601                                 return 0;
602                         }
603                         
604                         int success = 1;
605                         string trashCode = "";
606                         int currentSeqsDiffs = 0;
607
608                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
609                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
610             
611                         QualityScores currQual;
612                         if(qFileName != ""){
613                                 currQual = QualityScores(qFile);  m->gobble(qFile);
614                         }
615                         
616                         string origSeq = currSeq.getUnaligned();
617                         if (origSeq != "") {
618                                 
619                                 int barcodeIndex = 0;
620                                 int primerIndex = 0;
621                                 
622                 if(numLinkers != 0){
623                                         success = trimOligos.stripLinker(currSeq, currQual);
624                                         if(success > ldiffs)            {       trashCode += 'k';       }
625                                         else{ currentSeqsDiffs += success;  }
626
627                                 }
628                 
629                                 if(barcodes.size() != 0){
630                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
631                                         if(success > bdiffs)            {       trashCode += 'b';       }
632                                         else{ currentSeqsDiffs += success;  }
633                                 }
634                 
635                 if(rbarcodes.size() != 0){
636                                         success = trimOligos.stripRBarcode(currSeq, currQual, barcodeIndex);
637                                         if(success > bdiffs)            {       trashCode += 'b';       }
638                                         else{ currentSeqsDiffs += success;  }
639                                 }
640                                 
641                 if(numSpacers != 0){
642                                         success = trimOligos.stripSpacer(currSeq, currQual);
643                                         if(success > sdiffs)            {       trashCode += 's';       }
644                                         else{ currentSeqsDiffs += success;  }
645
646                                 }
647                 
648                                 if(numFPrimers != 0){
649                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
650                                         if(success > pdiffs)            {       trashCode += 'f';       }
651                                         else{ currentSeqsDiffs += success;  }
652                                 }
653                                 
654                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
655                                 
656                                 if(numRPrimers != 0){
657                                         success = trimOligos.stripReverse(currSeq, currQual);
658                                         if(!success)                            {       trashCode += 'r';       }
659                                 }
660
661                                 if(keepFirst != 0){
662                                         success = keepFirstTrim(currSeq, currQual);
663                                 }
664                                 
665                                 if(removeLast != 0){
666                                         success = removeLastTrim(currSeq, currQual);
667                                         if(!success)                            {       trashCode += 'l';       }
668                                 }
669
670                                 
671                                 if(qFileName != ""){
672                                         int origLength = currSeq.getNumBases();
673                                         
674                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
675                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
676                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
677                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
678                                         else                                            {       success = 1;                            }
679                                         
680                                         //you don't want to trim, if it fails above then scrap it
681                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
682                                         
683                                         if(!success)                            {       trashCode += 'q';       }
684                                 }                               
685                 
686                                 if(minLength > 0 || maxLength > 0){
687                                         success = cullLength(currSeq);
688                                         if(!success)                            {       trashCode += 'l';       }
689                                 }
690                                 if(maxHomoP > 0){
691                                         success = cullHomoP(currSeq);
692                                         if(!success)                            {       trashCode += 'h';       }
693                                 }
694                                 if(maxAmbig != -1){
695                                         success = cullAmbigs(currSeq);
696                                         if(!success)                            {       trashCode += 'n';       }
697                                 }
698                                 
699                                 if(flip){               // should go last                       
700                                         currSeq.reverseComplement();
701                                         if(qFileName != ""){
702                                                 currQual.flipQScores(); 
703                                         }
704                                 }
705                                 
706                                 if(trashCode.length() == 0){
707                                         currSeq.setAligned(currSeq.getUnaligned());
708                                         currSeq.printSequence(trimFASTAFile);
709                                         
710                                         if(qFileName != ""){
711                                                 currQual.printQScores(trimQualFile);
712                                         }
713                                         
714                     
715                                         if(nameFile != ""){
716                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
717                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
718                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
719                                         }
720                                         
721                                         if (createGroup) {
722                                                 if(barcodes.size() != 0){
723                                                         string thisGroup = barcodeNameVector[barcodeIndex];
724                                                         if (primers.size() != 0) { 
725                                                                 if (primerNameVector[primerIndex] != "") { 
726                                                                         if(thisGroup != "") {
727                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
728                                                                         }else {
729                                                                                 thisGroup = primerNameVector[primerIndex]; 
730                                                                         }
731                                                                 } 
732                                                         }
733                                                         
734                                                         outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
735                                                         
736                             int numRedundants = 0;
737                                                         if (nameFile != "") {
738                                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
739                                                                 if (itName != nameMap.end()) { 
740                                                                         vector<string> thisSeqsNames; 
741                                                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
742                                     numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
743                                                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
744                                                                                 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
745                                                                         }
746                                                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
747                                                         }
748                                                         
749                                                         map<string, int>::iterator it = groupCounts.find(thisGroup);
750                                                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1 + numRedundants; }
751                                                         else { groupCounts[it->first] += (1 + numRedundants); }
752                                                                 
753                                                 }
754                                         }
755                                         
756                                         if(allFiles){
757                                                 ofstream output;
758                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
759                                                 currSeq.printSequence(output);
760                                                 output.close();
761                                                 
762                                                 if(qFileName != ""){
763                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
764                                                         currQual.printQScores(output);
765                                                         output.close();                                                 
766                                                 }
767                                                 
768                                                 if(nameFile != ""){
769                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
770                                                         if (itName != nameMap.end()) { 
771                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
772                                                                 output << itName->first << '\t' << itName->second << endl; 
773                                                                 output.close();
774                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
775                                                 }
776                                         }
777                                 }
778                                 else{
779                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
780                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
781                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
782                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
783                                         }
784                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
785                                         currSeq.setUnaligned(origSeq);
786                                         currSeq.setAligned(origSeq);
787                                         currSeq.printSequence(scrapFASTAFile);
788                                         if(qFileName != ""){
789                                                 currQual.printQScores(scrapQualFile);
790                                         }
791                                 }
792                                 count++;
793                         }
794                         
795                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
796                                 unsigned long long pos = inFASTA.tellg();
797                                 if ((pos == -1) || (pos >= line.end)) { break; }
798                         
799                         #else
800                                 if (inFASTA.eof()) { break; }
801                         #endif
802                         
803                         //report progress
804                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
805                         
806                 }
807                 //report progress
808                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
809                 
810                 
811                 inFASTA.close();
812                 trimFASTAFile.close();
813                 scrapFASTAFile.close();
814                 if (createGroup) {       outGroupsFile.close();   }
815                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
816                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
817                 
818                 return count;
819         }
820         catch(exception& e) {
821                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
822                 exit(1);
823         }
824 }
825
826 /**************************************************************************************************/
827
828 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
829         try {
830         
831         int process = 1;
832                 int exitCommand = 1;
833                 processIDS.clear();
834                 
835 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
836                                 //loop through and create all the processes you want
837                 while (process != processors) {
838                         int pid = fork();
839                         
840                         if (pid > 0) {
841                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
842                                 process++;
843                         }else if (pid == 0){
844                                 
845                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
846                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
847                                 vector<vector<string> > tempNameFileNames = nameFileNames;
848
849                                 if(allFiles){
850                                         ofstream temp;
851
852                                         for(int i=0;i<tempFASTAFileNames.size();i++){
853                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
854                                                         if (tempFASTAFileNames[i][j] != "") {
855                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
856                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
857
858                                                                 if(qFileName != ""){
859                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
860                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
861                                                                 }
862                                                                 if(nameFile != ""){
863                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
864                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
865                                                                 }
866                                                         }
867                                                 }
868                                         }
869                                 }
870                                                         
871                                 driverCreateTrim(filename,
872                                                                  qFileName,
873                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
874                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
875                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
876                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
877                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
878                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
879                                                                  (groupFile + toString(getpid()) + ".temp"),
880                                                                  tempFASTAFileNames,
881                                                                  tempPrimerQualFileNames,
882                                                                  tempNameFileNames,
883                                                                  lines[process],
884                                                                  qLines[process]);
885                 
886                 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); }
887                                 
888                                 //pass groupCounts to parent
889                                 if(createGroup){
890                                         ofstream out;
891                                         string tempFile = filename + toString(getpid()) + ".num.temp";
892                                         m->openOutputFile(tempFile, out);
893                                         
894                                         out << groupCounts.size() << endl;
895                                         
896                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
897                                                 out << it->first << '\t' << it->second << endl;
898                                         }
899                                         out.close();
900                                 }
901                                 exit(0);
902                         }else { 
903                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
904                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
905                                 exit(0);
906                         }
907                 }
908                 
909                 //parent do my part
910                 ofstream temp;
911                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
912                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
913                 if(qFileName != ""){
914                         m->openOutputFile(trimQualFileName, temp);              temp.close();
915                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
916                 }
917                 if (nameFile != "") {
918                         m->openOutputFile(trimNameFileName, temp);              temp.close();
919                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
920                 }
921
922                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
923                 
924                 //force parent to wait until all the processes are done
925                 for (int i=0;i<processIDS.size();i++) { 
926                         int temp = processIDS[i];
927                         wait(&temp);
928                 }
929 #else
930         //////////////////////////////////////////////////////////////////////////////////////////////////////
931                 //Windows version shared memory, so be careful when passing variables through the trimData struct. 
932                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
933                 //////////////////////////////////////////////////////////////////////////////////////////////////////
934                 
935                 vector<trimData*> pDataArray; 
936                 DWORD   dwThreadIdArray[processors-1];
937                 HANDLE  hThreadArray[processors-1]; 
938                 
939                 //Create processor worker threads.
940                 for( int i=0; i<processors-1; i++){
941                         
942             string extension = "";
943                         if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
944             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
945             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
946             vector<vector<string> > tempNameFileNames = nameFileNames;
947             
948             if(allFiles){
949                 ofstream temp;
950                 
951                 for(int i=0;i<tempFASTAFileNames.size();i++){
952                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
953                         if (tempFASTAFileNames[i][j] != "") {
954                             tempFASTAFileNames[i][j] += extension;
955                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
956                             
957                             if(qFileName != ""){
958                                 tempPrimerQualFileNames[i][j] += extension;
959                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
960                             }
961                             if(nameFile != ""){
962                                 tempNameFileNames[i][j] += extension;
963                                 m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
964                             }
965                         }
966                     }
967                 }
968             }
969
970             
971                         trimData* tempTrim = new trimData(filename,
972                                               qFileName, nameFile,
973                                               (trimFASTAFileName+extension),
974                                               (scrapFASTAFileName+extension),
975                                               (trimQualFileName+extension),
976                                               (scrapQualFileName+extension),
977                                               (trimNameFileName+extension),
978                                               (scrapNameFileName+extension),
979                                               (groupFile+extension),
980                                               tempFASTAFileNames,
981                                               tempPrimerQualFileNames,
982                                               tempNameFileNames,
983                                               lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
984                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer, 
985                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
986                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
987                                              minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
988                         pDataArray.push_back(tempTrim);
989             
990                         hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
991                 }
992         
993         //parent do my part
994                 ofstream temp;
995                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
996                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
997                 if(qFileName != ""){
998                         m->openOutputFile(trimQualFileName, temp);              temp.close();
999                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
1000                 }
1001                 if (nameFile != "") {
1002                         m->openOutputFile(trimNameFileName, temp);              temp.close();
1003                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
1004                 }
1005         
1006                 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
1007         processIDS.push_back(processors-1);
1008
1009         
1010                 //Wait until all threads have terminated.
1011                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1012                 
1013                 //Close all thread handles and free memory allocations.
1014                 for(int i=0; i < pDataArray.size(); i++){
1015                         for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
1016                 map<string, int>::iterator it2 = groupCounts.find(it->first);
1017                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
1018                 else { groupCounts[it->first] += it->second; }
1019             }
1020             CloseHandle(hThreadArray[i]);
1021                         delete pDataArray[i];
1022                 }
1023         
1024 #endif          
1025         
1026         
1027         //append files
1028                 for(int i=0;i<processIDS.size();i++){
1029                         
1030                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1031                         
1032                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
1033                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
1034                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1035                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1036                         
1037                         if(qFileName != ""){
1038                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1039                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1040                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1041                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1042                         }
1043                         
1044                         if(nameFile != ""){
1045                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1046                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1047                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1048                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1049                         }
1050                         
1051                         if(createGroup){
1052                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1053                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1054                         }
1055                         
1056                         
1057                         if(allFiles){
1058                                 for(int j=0;j<fastaFileNames.size();j++){
1059                                         for(int k=0;k<fastaFileNames[j].size();k++){
1060                                                 if (fastaFileNames[j][k] != "") {
1061                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1062                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1063                                                         
1064                                                         if(qFileName != ""){
1065                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1066                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1067                                                         }
1068                                                         
1069                                                         if(nameFile != ""){
1070                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1071                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1072                                                         }
1073                                                 }
1074                                         }
1075                                 }
1076                         }
1077                         
1078             #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1079                         if(createGroup){
1080                                 ifstream in;
1081                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1082                                 m->openInputFile(tempFile, in);
1083                                 int tempNum;
1084                                 string group;
1085                                 
1086                                 in >> tempNum; m->gobble(in);
1087                                 
1088                                 if (tempNum != 0) {
1089                                         while (!in.eof()) { 
1090                                                 in >> group >> tempNum; m->gobble(in);
1091                         
1092                                                 map<string, int>::iterator it = groupCounts.find(group);
1093                                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
1094                                                 else { groupCounts[it->first] += tempNum; }
1095                                         }
1096                                 }
1097                                 in.close(); m->mothurRemove(tempFile);
1098                         }
1099             #endif
1100                 }
1101
1102         return exitCommand;
1103         }
1104         catch(exception& e) {
1105                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1106                 exit(1);
1107         }
1108 }
1109
1110 /**************************************************************************************************/
1111
1112 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1113         try {
1114         
1115         vector<unsigned long long> fastaFilePos;
1116                 vector<unsigned long long> qfileFilePos;
1117                 
1118                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1119                 //set file positions for fasta file
1120                 fastaFilePos = m->divideFile(filename, processors);
1121                 
1122                 //get name of first sequence in each chunk
1123                 map<string, int> firstSeqNames;
1124                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1125                         ifstream in;
1126                         m->openInputFile(filename, in);
1127                         in.seekg(fastaFilePos[i]);
1128                 
1129                         Sequence temp(in); 
1130                         firstSeqNames[temp.getName()] = i;
1131                 
1132                         in.close();
1133                 }
1134                 
1135                 if(qfilename != "")     {
1136             //seach for filePos of each first name in the qfile and save in qfileFilePos
1137             ifstream inQual;
1138             m->openInputFile(qfilename, inQual);
1139             
1140             string input;
1141             while(!inQual.eof()){       
1142                 input = m->getline(inQual);
1143                 
1144                 if (input.length() != 0) {
1145                     if(input[0] == '>'){ //this is a sequence name line
1146                         istringstream nameStream(input);
1147                         
1148                         string sname = "";  nameStream >> sname;
1149                         sname = sname.substr(1);
1150                         
1151                         map<string, int>::iterator it = firstSeqNames.find(sname);
1152                         
1153                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
1154                             unsigned long long pos = inQual.tellg(); 
1155                             qfileFilePos.push_back(pos - input.length() - 1);   
1156                             firstSeqNames.erase(it);
1157                         }
1158                     }
1159                 }
1160                 
1161                 if (firstSeqNames.size() == 0) { break; }
1162             }
1163             inQual.close();
1164             
1165             
1166             if (firstSeqNames.size() != 0) { 
1167                 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1168                     m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1169                 }
1170                 qFileName = "";
1171                 return processors;
1172             }
1173             
1174             //get last file position of qfile
1175             FILE * pFile;
1176             unsigned long long size;
1177             
1178             //get num bytes in file
1179             pFile = fopen (qfilename.c_str(),"rb");
1180             if (pFile==NULL) perror ("Error opening file");
1181             else{
1182                 fseek (pFile, 0, SEEK_END);
1183                 size=ftell (pFile);
1184                 fclose (pFile);
1185             }
1186             
1187             qfileFilePos.push_back(size);
1188         }
1189         
1190         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1191             if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); }
1192                         lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1193                         if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
1194                 }       
1195                 if(qfilename == "")     {       qLines = lines; } //files with duds
1196                 
1197                 return processors;
1198                 
1199                 #else
1200             
1201         if (processors == 1) { //save time
1202                         //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1203                         //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1204             lines.push_back(linePair(0, 1000));
1205             if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
1206         }else{
1207             int numFastaSeqs = 0;
1208             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
1209             if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1210         
1211             if (qfilename != "") { 
1212                 int numQualSeqs = 0;
1213                 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
1214                 
1215                 if (numFastaSeqs != numQualSeqs) {
1216                     m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
1217                 }
1218             }
1219         
1220             //figure out how many sequences you have to process
1221             int numSeqsPerProcessor = numFastaSeqs / processors;
1222             for (int i = 0; i < processors; i++) {
1223                 int startIndex =  i * numSeqsPerProcessor;
1224                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
1225                 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1226                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1227             }
1228         }
1229             if(qfilename == "") {       qLines = lines; } //files with duds
1230                         return 1;
1231                 
1232                 #endif
1233         }
1234         catch(exception& e) {
1235                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1236                 exit(1);
1237         }
1238 }
1239
1240 //***************************************************************************************************************
1241
1242 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1243         try {
1244                 ifstream inOligos;
1245                 m->openInputFile(oligoFile, inOligos);
1246                 
1247                 ofstream test;
1248                 
1249                 string type, oligo, group;
1250
1251                 int indexPrimer = 0;
1252                 int indexBarcode = 0;
1253                 
1254                 while(!inOligos.eof()){
1255
1256                         inOligos >> type; 
1257                                         
1258                         if(type[0] == '#'){
1259                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1260                                 m->gobble(inOligos);
1261                         }
1262                         else{
1263                                 m->gobble(inOligos);
1264                                 //make type case insensitive
1265                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1266                                 
1267                                 inOligos >> oligo;
1268                                 
1269                                 for(int i=0;i<oligo.length();i++){
1270                                         oligo[i] = toupper(oligo[i]);
1271                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1272                                 }
1273                                 
1274                                 if(type == "FORWARD"){
1275                                         group = "";
1276                                         
1277                                         // get rest of line in case there is a primer name
1278                                         while (!inOligos.eof()) {       
1279                                                 char c = inOligos.get(); 
1280                                                 if (c == 10 || c == 13){        break;  }
1281                                                 else if (c == 32 || c == 9){;} //space or tab
1282                                                 else {  group += c;  }
1283                                         } 
1284                                         
1285                                         //check for repeat barcodes
1286                                         map<string, int>::iterator itPrime = primers.find(oligo);
1287                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1288                                         
1289                                         primers[oligo]=indexPrimer; indexPrimer++;              
1290                                         primerNameVector.push_back(group);
1291                                 }
1292                                 else if(type == "REVERSE"){
1293                                         //Sequence oligoRC("reverse", oligo);
1294                                         //oligoRC.reverseComplement();
1295                     string oligoRC = reverseOligo(oligo);
1296                                         revPrimer.push_back(oligoRC);
1297                                 }
1298                                 else if(type == "BARCODE"){
1299                                         inOligos >> group;
1300                     
1301                     //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
1302                     //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
1303                                         string temp = "";
1304                     while (!inOligos.eof())     {       
1305                                                 char c = inOligos.get(); 
1306                                                 if (c == 10 || c == 13){        break;  }
1307                                                 else if (c == 32 || c == 9){;} //space or tab
1308                                                 else {  temp += c;  }
1309                                         } 
1310                                         
1311                     //then this is illumina data with 4 columns
1312                     if (temp != "") {  
1313                         string reverseBarcode = reverseOligo(group); //reverse barcode
1314                         group = temp;
1315                         
1316                         //check for repeat barcodes
1317                         map<string, int>::iterator itBar = rbarcodes.find(reverseBarcode);
1318                         if (itBar != rbarcodes.end()) { m->mothurOut("barcode " + reverseBarcode + " is in your oligos file already."); m->mothurOutEndLine();  }
1319                                                 
1320                         rbarcodes[reverseBarcode]=indexBarcode; 
1321                     }
1322                         
1323                                         //check for repeat barcodes
1324                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1325                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1326                                                 
1327                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1328                                         barcodeNameVector.push_back(group);
1329                                 }else if(type == "LINKER"){
1330                                         linker.push_back(oligo);
1331                                 }else if(type == "SPACER"){
1332                                         spacer.push_back(oligo);
1333                                 }
1334                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1335                         }
1336                         m->gobble(inOligos);
1337                 }       
1338                 inOligos.close();
1339                 
1340                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1341                 
1342                 //add in potential combos
1343                 if(barcodeNameVector.size() == 0){
1344                         barcodes[""] = 0;
1345                         barcodeNameVector.push_back("");                        
1346                 }
1347                 
1348                 if(primerNameVector.size() == 0){
1349                         primers[""] = 0;
1350                         primerNameVector.push_back("");                 
1351                 }
1352                 
1353                 fastaFileNames.resize(barcodeNameVector.size());
1354                 for(int i=0;i<fastaFileNames.size();i++){
1355                         fastaFileNames[i].assign(primerNameVector.size(), "");
1356                 }
1357                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1358                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1359                 
1360                 if(allFiles){
1361                         set<string> uniqueNames; //used to cleanup outputFileNames
1362                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1363                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1364                                         
1365                                         string primerName = primerNameVector[itPrimer->second];
1366                                         string barcodeName = barcodeNameVector[itBar->second];
1367                                         
1368                                         string comboGroupName = "";
1369                                         string fastaFileName = "";
1370                                         string qualFileName = "";
1371                                         string nameFileName = "";
1372                                         
1373                                         if(primerName == ""){
1374                                                 comboGroupName = barcodeNameVector[itBar->second];
1375                                         }
1376                                         else{
1377                                                 if(barcodeName == ""){
1378                                                         comboGroupName = primerNameVector[itPrimer->second];
1379                                                 }
1380                                                 else{
1381                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1382                                                 }
1383                                         }
1384                                         
1385                                         
1386                                         ofstream temp;
1387                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1388                                         if (uniqueNames.count(fastaFileName) == 0) {
1389                                                 outputNames.push_back(fastaFileName);
1390                                                 outputTypes["fasta"].push_back(fastaFileName);
1391                                                 uniqueNames.insert(fastaFileName);
1392                                         }
1393                                         
1394                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1395                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1396                                         
1397                                         if(qFileName != ""){
1398                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1399                                                 if (uniqueNames.count(qualFileName) == 0) {
1400                                                         outputNames.push_back(qualFileName);
1401                                                         outputTypes["qfile"].push_back(qualFileName);
1402                                                 }
1403                                                 
1404                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1405                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1406                                         }
1407                                         
1408                                         if(nameFile != ""){
1409                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1410                                                 if (uniqueNames.count(nameFileName) == 0) {
1411                                                         outputNames.push_back(nameFileName);
1412                                                         outputTypes["name"].push_back(nameFileName);
1413                                                 }
1414                                                 
1415                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1416                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1417                                         }
1418                                         
1419                                 }
1420                         }
1421                 }
1422                 numFPrimers = primers.size();
1423                 numRPrimers = revPrimer.size();
1424         numLinkers = linker.size();
1425         numSpacers = spacer.size();
1426                 
1427                 bool allBlank = true;
1428                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1429                         if (barcodeNameVector[i] != "") {
1430                                 allBlank = false;
1431                                 break;
1432                         }
1433                 }
1434                 for (int i = 0; i < primerNameVector.size(); i++) {
1435                         if (primerNameVector[i] != "") {
1436                                 allBlank = false;
1437                                 break;
1438                         }
1439                 }
1440                 
1441                 if (allBlank) {
1442                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1443                         allFiles = false;
1444                         return false;
1445                 }
1446                 
1447                 return true;
1448                 
1449         }
1450         catch(exception& e) {
1451                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1452                 exit(1);
1453         }
1454 }
1455 //***************************************************************************************************************
1456
1457 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1458         try {
1459                 bool success = 1;
1460                 if(qscores.getName() != ""){
1461                         qscores.trimQScores(-1, keepFirst);
1462                 }
1463                 sequence.trim(keepFirst);
1464                 return success;
1465         }
1466         catch(exception& e) {
1467                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1468                 exit(1);
1469         }
1470         
1471 }       
1472
1473 //***************************************************************************************************************
1474
1475 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1476         try {
1477                 bool success = 0;
1478                 
1479                 int length = sequence.getNumBases() - removeLast;
1480                 
1481                 if(length > 0){
1482                         if(qscores.getName() != ""){
1483                                 qscores.trimQScores(-1, length);
1484                         }
1485                         sequence.trim(length);
1486                         success = 1;
1487                 }
1488                 else{
1489                         success = 0;
1490                 }
1491
1492                 return success;
1493         }
1494         catch(exception& e) {
1495                 m->errorOut(e, "removeLastTrim", "countDiffs");
1496                 exit(1);
1497         }
1498         
1499 }       
1500
1501 //***************************************************************************************************************
1502
1503 bool TrimSeqsCommand::cullLength(Sequence& seq){
1504         try {
1505         
1506                 int length = seq.getNumBases();
1507                 bool success = 0;       //guilty until proven innocent
1508                 
1509                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1510                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1511                 else                                                                                            {       success = 0;    }
1512                 
1513                 return success;
1514         
1515         }
1516         catch(exception& e) {
1517                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1518                 exit(1);
1519         }
1520         
1521 }
1522
1523 //***************************************************************************************************************
1524
1525 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1526         try {
1527                 int longHomoP = seq.getLongHomoPolymer();
1528                 bool success = 0;       //guilty until proven innocent
1529                 
1530                 if(longHomoP <= maxHomoP){      success = 1;    }
1531                 else                                    {       success = 0;    }
1532                 
1533                 return success;
1534         }
1535         catch(exception& e) {
1536                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1537                 exit(1);
1538         }
1539         
1540 }
1541 //********************************************************************/
1542 string TrimSeqsCommand::reverseOligo(string oligo){
1543         try {
1544         string reverse = "";
1545         
1546         for(int i=oligo.length()-1;i>=0;i--){
1547             
1548             if(oligo[i] == 'A')         {       reverse += 'T'; }
1549             else if(oligo[i] == 'T'){   reverse += 'A'; }
1550             else if(oligo[i] == 'U'){   reverse += 'A'; }
1551             
1552             else if(oligo[i] == 'G'){   reverse += 'C'; }
1553             else if(oligo[i] == 'C'){   reverse += 'G'; }
1554             
1555             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1556             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1557             
1558             else if(oligo[i] == 'M'){   reverse += 'K'; }
1559             else if(oligo[i] == 'K'){   reverse += 'M'; }
1560             
1561             else if(oligo[i] == 'W'){   reverse += 'W'; }
1562             else if(oligo[i] == 'S'){   reverse += 'S'; }
1563             
1564             else if(oligo[i] == 'B'){   reverse += 'V'; }
1565             else if(oligo[i] == 'V'){   reverse += 'B'; }
1566             
1567             else if(oligo[i] == 'D'){   reverse += 'H'; }
1568             else if(oligo[i] == 'H'){   reverse += 'D'; }
1569             
1570             else                                                {       reverse += 'N'; }
1571         }
1572         
1573         
1574         return reverse;
1575     }
1576         catch(exception& e) {
1577                 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1578                 exit(1);
1579         }
1580 }
1581
1582 //***************************************************************************************************************
1583
1584 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1585         try {
1586                 int numNs = seq.getAmbigBases();
1587                 bool success = 0;       //guilty until proven innocent
1588                 
1589                 if(numNs <= maxAmbig)   {       success = 1;    }
1590                 else                                    {       success = 0;    }
1591                 
1592                 return success;
1593         }
1594         catch(exception& e) {
1595                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1596                 exit(1);
1597         }
1598         
1599 }
1600 //***************************************************************************************************************