]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
changed reading of name file to use buffered reads. note the splitAtWhiteSpace functi...
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::setParameters(){        
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
22                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
23                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
24                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
25                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
26                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
27                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
28         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
29                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
30         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
31                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
32                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
33                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
34                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
35                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
36                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
37                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
38                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
39                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
40                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
41                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
42                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
43                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
44                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
45                         
46                 vector<string> myArray;
47                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string TrimSeqsCommand::getHelpString(){        
57         try {
58                 string helpString = "";
59                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
60                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
61                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
62                 helpString += "The fasta parameter is required.\n";
63                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
64                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
65                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
66                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
67                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
68                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
69                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
70                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
71                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
72                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
73         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
74                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
75                 helpString += "The qfile parameter allows you to provide a quality file.\n";
76                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
77                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
78                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
79                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
80                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
81                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
82                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
83                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
84                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
85                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
86                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
87                 helpString += "The trim.seqs command should be in the following format: \n";
88                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
89                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
90                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
91                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
92                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100
101
102 //**********************************************************************************************************************
103
104 TrimSeqsCommand::TrimSeqsCommand(){     
105         try {
106                 abort = true; calledHelp = true; 
107                 setParameters();
108                 vector<string> tempOutNames;
109                 outputTypes["fasta"] = tempOutNames;
110                 outputTypes["qfile"] = tempOutNames;
111                 outputTypes["group"] = tempOutNames;
112                 outputTypes["name"] = tempOutNames;
113         }
114         catch(exception& e) {
115                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
116                 exit(1);
117         }
118 }
119 //***************************************************************************************************************
120
121 TrimSeqsCommand::TrimSeqsCommand(string option)  {
122         try {
123                 
124                 abort = false; calledHelp = false;   
125                 comboStarts = 0;
126                 
127                 //allow user to run help
128                 if(option == "help") { help(); abort = true; calledHelp = true; }
129                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
130                 
131                 else {
132                         vector<string> myArray = setParameters();
133                         
134                         OptionParser parser(option);
135                         map<string,string> parameters = parser.getParameters();
136                         
137                         ValidParameters validParameter;
138                         map<string,string>::iterator it;
139                         
140                         //check to make sure all parameters are valid for command
141                         for (it = parameters.begin(); it != parameters.end(); it++) { 
142                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
143                         }
144                         
145                         //initialize outputTypes
146                         vector<string> tempOutNames;
147                         outputTypes["fasta"] = tempOutNames;
148                         outputTypes["qfile"] = tempOutNames;
149                         outputTypes["group"] = tempOutNames;
150                         outputTypes["name"] = tempOutNames;
151                         
152                         //if the user changes the input directory command factory will send this info to us in the output parameter 
153                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
154                         if (inputDir == "not found"){   inputDir = "";          }
155                         else {
156                                 string path;
157                                 it = parameters.find("fasta");
158                                 //user has given a template file
159                                 if(it != parameters.end()){ 
160                                         path = m->hasPath(it->second);
161                                         //if the user has not given a path then, add inputdir. else leave path alone.
162                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
163                                 }
164                                 
165                                 it = parameters.find("oligos");
166                                 //user has given a template file
167                                 if(it != parameters.end()){ 
168                                         path = m->hasPath(it->second);
169                                         //if the user has not given a path then, add inputdir. else leave path alone.
170                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
171                                 }
172                                 
173                                 it = parameters.find("qfile");
174                                 //user has given a template file
175                                 if(it != parameters.end()){ 
176                                         path = m->hasPath(it->second);
177                                         //if the user has not given a path then, add inputdir. else leave path alone.
178                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
179                                 }
180                                 
181                                 it = parameters.find("name");
182                                 //user has given a template file
183                                 if(it != parameters.end()){ 
184                                         path = m->hasPath(it->second);
185                                         //if the user has not given a path then, add inputdir. else leave path alone.
186                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
187                                 }
188                                 
189                         }
190
191                         
192                         //check for required parameters
193                         fastaFile = validParameter.validFile(parameters, "fasta", true);
194                         if (fastaFile == "not found") {                                 
195                                 fastaFile = m->getFastaFile(); 
196                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
197                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
198                         }else if (fastaFile == "not open") { abort = true; }    
199                         else { m->setFastaFile(fastaFile); }
200                         
201                         //if the user changes the output directory command factory will send this info to us in the output parameter 
202                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
203                                 outputDir = ""; 
204                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
205                         }
206                 
207                         
208                         //check for optional parameter and set defaults
209                         // ...at some point should added some additional type checking...
210                         string temp;
211                         temp = validParameter.validFile(parameters, "flip", false);
212                         if (temp == "not found")    {   flip = 0;       }
213                         else {  flip = m->isTrue(temp);         }
214                 
215                         temp = validParameter.validFile(parameters, "oligos", true);
216                         if (temp == "not found"){       oligoFile = "";         }
217                         else if(temp == "not open"){    abort = true;   } 
218                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
219                         
220                         
221                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
222                         m->mothurConvert(temp, maxAmbig);  
223
224                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
225                         m->mothurConvert(temp, maxHomoP);  
226
227                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
228                         m->mothurConvert(temp, minLength); 
229                         
230                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
231                         m->mothurConvert(temp, maxLength);
232                         
233                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
234                         m->mothurConvert(temp, bdiffs);
235                         
236                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
237                         m->mothurConvert(temp, pdiffs);
238             
239             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
240                         m->mothurConvert(temp, ldiffs);
241             
242             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
243                         m->mothurConvert(temp, sdiffs);
244                         
245                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
246                         m->mothurConvert(temp, tdiffs);
247                         
248                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
249                         
250                         temp = validParameter.validFile(parameters, "qfile", true);     
251                         if (temp == "not found")        {       qFileName = "";         }
252                         else if(temp == "not open")     {       abort = true;           }
253                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
254                         
255                         temp = validParameter.validFile(parameters, "name", true);      
256                         if (temp == "not found")        {       nameFile = "";          }
257                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
258                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
259                         
260                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
261                         m->mothurConvert(temp, qThreshold);
262                         
263                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
264                         qtrim = m->isTrue(temp);
265
266                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
267                         convert(temp, qRollAverage);
268
269                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
270                         convert(temp, qWindowAverage);
271
272                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
273                         convert(temp, qWindowSize);
274
275                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
276                         convert(temp, qWindowStep);
277
278                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
279                         convert(temp, qAverage);
280
281                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
282                         convert(temp, keepFirst);
283
284                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
285                         convert(temp, removeLast);
286                         
287                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
288                         allFiles = m->isTrue(temp);
289             
290             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
291                         keepforward = m->isTrue(temp);
292                         
293                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
294                         m->setProcessors(temp);
295                         m->mothurConvert(temp, processors); 
296                         
297                         
298                         if(allFiles && (oligoFile == "")){
299                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
300                         }
301                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
302                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
303                                 qAverage=0;
304                                 qThreshold=0;
305                         }
306                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
307                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
308                                 abort = true;
309                         }
310                         
311                         if (nameFile == "") {
312                                 vector<string> files; files.push_back(fastaFile);
313                                 parser.getNameFile(files);
314                         }
315                 }
316
317         }
318         catch(exception& e) {
319                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
320                 exit(1);
321         }
322 }
323 //***************************************************************************************************************
324
325 int TrimSeqsCommand::execute(){
326         try{
327         
328                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
329                 
330                 numFPrimers = 0;  //this needs to be initialized
331                 numRPrimers = 0;
332         numSpacers = 0;
333         numLinkers = 0;
334                 createGroup = false;
335                 vector<vector<string> > fastaFileNames;
336                 vector<vector<string> > qualFileNames;
337                 vector<vector<string> > nameFileNames;
338                 
339                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
340                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
341                 
342                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
343                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
344                 
345                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
346                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
347                 
348                 if (qFileName != "") {
349                         outputNames.push_back(trimQualFile);
350                         outputNames.push_back(scrapQualFile);
351                         outputTypes["qfile"].push_back(trimQualFile);
352                         outputTypes["qfile"].push_back(scrapQualFile); 
353                 }
354                 
355                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
356                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
357                 
358                 if (nameFile != "") {
359                         m->readNames(nameFile, nameMap);
360                         outputNames.push_back(trimNameFile);
361                         outputNames.push_back(scrapNameFile);
362                         outputTypes["name"].push_back(trimNameFile);
363                         outputTypes["name"].push_back(scrapNameFile); 
364                 }
365                 
366                 if (m->control_pressed) { return 0; }
367                 
368                 string outputGroupFileName;
369                 if(oligoFile != ""){
370                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
371                         if (createGroup) {
372                                 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
373                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
374                         }
375                 }
376        
377         //fills lines and qlines
378                 setLines(fastaFile, qFileName);
379                 
380         if(processors == 1){
381             driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
382         }else{
383             createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
384         }       
385                 
386                 
387                 if (m->control_pressed) {  return 0; }                  
388         
389                 if(allFiles){
390                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
391                         map<string, string>::iterator it;
392                         set<string> namesToRemove;
393                         for(int i=0;i<fastaFileNames.size();i++){
394                                 for(int j=0;j<fastaFileNames[0].size();j++){
395                                         if (fastaFileNames[i][j] != "") {
396                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
397                                                         if(m->isBlank(fastaFileNames[i][j])){
398                                                                 m->mothurRemove(fastaFileNames[i][j]);
399                                                                 namesToRemove.insert(fastaFileNames[i][j]);
400                                                         
401                                                                 if(qFileName != ""){
402                                                                         m->mothurRemove(qualFileNames[i][j]);
403                                                                         namesToRemove.insert(qualFileNames[i][j]);
404                                                                 }
405                                                                 
406                                                                 if(nameFile != ""){
407                                                                         m->mothurRemove(nameFileNames[i][j]);
408                                                                         namesToRemove.insert(nameFileNames[i][j]);
409                                                                 }
410                                                         }else{  
411                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
412                                                                 if (it == uniqueFastaNames.end()) {     
413                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
414                                                                 }       
415                                                         }
416                                                 }
417                                         }
418                                 }
419                         }
420                         
421                         //remove names for outputFileNames, just cleans up the output
422                         vector<string> outputNames2;
423                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
424                         outputNames = outputNames2;
425                         
426                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
427                                 ifstream in;
428                                 m->openInputFile(it->first, in);
429                                 
430                                 ofstream out;
431                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
432                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
433                                 m->openOutputFile(thisGroupName, out);
434                                 
435                                 while (!in.eof()){
436                                         if (m->control_pressed) { break; }
437                                         
438                                         Sequence currSeq(in); m->gobble(in);
439                                         out << currSeq.getName() << '\t' << it->second << endl;
440                     
441                     if (nameFile != "") {
442                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
443                         if (itName != nameMap.end()) { 
444                             vector<string> thisSeqsNames; 
445                             m->splitAtChar(itName->second, thisSeqsNames, ',');
446                             for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
447                                 out << thisSeqsNames[k] << '\t' << it->second << endl;
448                             }
449                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
450                     }
451                                 }
452                                 in.close();
453                                 out.close();
454                         }
455                 }
456                 
457                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
458
459                 //output group counts
460                 m->mothurOutEndLine();
461                 int total = 0;
462                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
463                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
464                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
465                 }
466                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
467                 
468                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
469
470                 //set fasta file as new current fastafile
471                 string current = "";
472                 itTypes = outputTypes.find("fasta");
473                 if (itTypes != outputTypes.end()) {
474                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
475                 }
476                 
477                 itTypes = outputTypes.find("name");
478                 if (itTypes != outputTypes.end()) {
479                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
480                 }
481                 
482                 itTypes = outputTypes.find("qfile");
483                 if (itTypes != outputTypes.end()) {
484                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
485                 }
486                 
487                 itTypes = outputTypes.find("group");
488                 if (itTypes != outputTypes.end()) {
489                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
490                 }
491
492                 m->mothurOutEndLine();
493                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
494                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
495                 m->mothurOutEndLine();
496                 
497                 return 0;       
498                         
499         }
500         catch(exception& e) {
501                 m->errorOut(e, "TrimSeqsCommand", "execute");
502                 exit(1);
503         }
504 }
505                 
506 /**************************************************************************************/
507
508 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {      
509                 
510         try {
511                 
512                 ofstream trimFASTAFile;
513                 m->openOutputFile(trimFileName, trimFASTAFile);
514                 
515                 ofstream scrapFASTAFile;
516                 m->openOutputFile(scrapFileName, scrapFASTAFile);
517                 
518                 ofstream trimQualFile;
519                 ofstream scrapQualFile;
520                 if(qFileName != ""){
521                         m->openOutputFile(trimQFileName, trimQualFile);
522                         m->openOutputFile(scrapQFileName, scrapQualFile);
523                 }
524                 
525                 ofstream trimNameFile;
526                 ofstream scrapNameFile;
527                 if(nameFile != ""){
528                         m->openOutputFile(trimNFileName, trimNameFile);
529                         m->openOutputFile(scrapNFileName, scrapNameFile);
530                 }
531                 
532                 
533                 ofstream outGroupsFile;
534                 if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
535                 if(allFiles){
536                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
537                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
538                                         if (fastaFileNames[i][j] != "") {
539                                                 ofstream temp;
540                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
541                                                 if(qFileName != ""){
542                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
543                                                 }
544                                                 
545                                                 if(nameFile != ""){
546                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
547                                                 }
548                                         }
549                                 }
550                         }
551                 }
552                 
553                 ifstream inFASTA;
554                 m->openInputFile(filename, inFASTA);
555                 inFASTA.seekg(line.start);
556                 
557                 ifstream qFile;
558                 if(qFileName != "")     {
559                         m->openInputFile(qFileName, qFile);
560                         qFile.seekg(qline.start);  
561                 }
562                 
563                 int count = 0;
564                 bool moreSeqs = 1;
565                 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer);
566         
567                 while (moreSeqs) {
568                                 
569                         if (m->control_pressed) { 
570                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
571                                 if (createGroup) {       outGroupsFile.close();   }
572
573                                 if(qFileName != ""){
574                                         qFile.close();
575                                 }
576                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
577
578                                 return 0;
579                         }
580                         
581                         int success = 1;
582                         string trashCode = "";
583                         int currentSeqsDiffs = 0;
584
585                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
586                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
587                         QualityScores currQual;
588                         if(qFileName != ""){
589                                 currQual = QualityScores(qFile);  m->gobble(qFile);
590                         }
591                         
592                         string origSeq = currSeq.getUnaligned();
593                         if (origSeq != "") {
594                                 
595                                 int barcodeIndex = 0;
596                                 int primerIndex = 0;
597                                 
598                 if(numLinkers != 0){
599                                         success = trimOligos.stripLinker(currSeq, currQual);
600                                         if(success > ldiffs)            {       trashCode += 'k';       }
601                                         else{ currentSeqsDiffs += success;  }
602
603                                 }
604                 
605                                 if(barcodes.size() != 0){
606                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
607                                         if(success > bdiffs)            {       trashCode += 'b';       }
608                                         else{ currentSeqsDiffs += success;  }
609                                 }
610                 
611                 if(rbarcodes.size() != 0){
612                                         success = trimOligos.stripRBarcode(currSeq, currQual, barcodeIndex);
613                                         if(success > bdiffs)            {       trashCode += 'b';       }
614                                         else{ currentSeqsDiffs += success;  }
615                                 }
616                                 
617                 if(numSpacers != 0){
618                                         success = trimOligos.stripSpacer(currSeq, currQual);
619                                         if(success > sdiffs)            {       trashCode += 's';       }
620                                         else{ currentSeqsDiffs += success;  }
621
622                                 }
623                 
624                                 if(numFPrimers != 0){
625                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
626                                         if(success > pdiffs)            {       trashCode += 'f';       }
627                                         else{ currentSeqsDiffs += success;  }
628                                 }
629                                 
630                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
631                                 
632                                 if(numRPrimers != 0){
633                                         success = trimOligos.stripReverse(currSeq, currQual);
634                                         if(!success)                            {       trashCode += 'r';       }
635                                 }
636
637                                 if(keepFirst != 0){
638                                         success = keepFirstTrim(currSeq, currQual);
639                                 }
640                                 
641                                 if(removeLast != 0){
642                                         success = removeLastTrim(currSeq, currQual);
643                                         if(!success)                            {       trashCode += 'l';       }
644                                 }
645
646                                 
647                                 if(qFileName != ""){
648                                         int origLength = currSeq.getNumBases();
649                                         
650                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
651                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
652                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
653                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
654                                         else                                            {       success = 1;                            }
655                                         
656                                         //you don't want to trim, if it fails above then scrap it
657                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
658                                         
659                                         if(!success)                            {       trashCode += 'q';       }
660                                 }                               
661                 
662                                 if(minLength > 0 || maxLength > 0){
663                                         success = cullLength(currSeq);
664                                         if(!success)                            {       trashCode += 'l';       }
665                                 }
666                                 if(maxHomoP > 0){
667                                         success = cullHomoP(currSeq);
668                                         if(!success)                            {       trashCode += 'h';       }
669                                 }
670                                 if(maxAmbig != -1){
671                                         success = cullAmbigs(currSeq);
672                                         if(!success)                            {       trashCode += 'n';       }
673                                 }
674                                 
675                                 if(flip){               // should go last                       
676                                         currSeq.reverseComplement();
677                                         if(qFileName != ""){
678                                                 currQual.flipQScores(); 
679                                         }
680                                 }
681                                 
682                                 if(trashCode.length() == 0){
683                                         currSeq.setAligned(currSeq.getUnaligned());
684                                         currSeq.printSequence(trimFASTAFile);
685                                         
686                                         if(qFileName != ""){
687                                                 currQual.printQScores(trimQualFile);
688                                         }
689                                         
690                     
691                                         if(nameFile != ""){
692                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
693                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
694                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
695                                         }
696                                         
697                                         if (createGroup) {
698                                                 if(barcodes.size() != 0){
699                                                         string thisGroup = barcodeNameVector[barcodeIndex];
700                                                         if (primers.size() != 0) { 
701                                                                 if (primerNameVector[primerIndex] != "") { 
702                                                                         if(thisGroup != "") {
703                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
704                                                                         }else {
705                                                                                 thisGroup = primerNameVector[primerIndex]; 
706                                                                         }
707                                                                 } 
708                                                         }
709                                                         
710                                                         outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
711                                                         
712                             int numRedundants = 0;
713                                                         if (nameFile != "") {
714                                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
715                                                                 if (itName != nameMap.end()) { 
716                                                                         vector<string> thisSeqsNames; 
717                                                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
718                                     numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
719                                                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
720                                                                                 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
721                                                                         }
722                                                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
723                                                         }
724                                                         
725                                                         map<string, int>::iterator it = groupCounts.find(thisGroup);
726                                                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1 + numRedundants; }
727                                                         else { groupCounts[it->first] += (1 + numRedundants); }
728                                                                 
729                                                 }
730                                         }
731                                         
732                                         if(allFiles){
733                                                 ofstream output;
734                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
735                                                 currSeq.printSequence(output);
736                                                 output.close();
737                                                 
738                                                 if(qFileName != ""){
739                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
740                                                         currQual.printQScores(output);
741                                                         output.close();                                                 
742                                                 }
743                                                 
744                                                 if(nameFile != ""){
745                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
746                                                         if (itName != nameMap.end()) { 
747                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
748                                                                 output << itName->first << '\t' << itName->second << endl; 
749                                                                 output.close();
750                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
751                                                 }
752                                         }
753                                 }
754                                 else{
755                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
756                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
757                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
758                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
759                                         }
760                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
761                                         currSeq.setUnaligned(origSeq);
762                                         currSeq.setAligned(origSeq);
763                                         currSeq.printSequence(scrapFASTAFile);
764                                         if(qFileName != ""){
765                                                 currQual.printQScores(scrapQualFile);
766                                         }
767                                 }
768                                 count++;
769                         }
770                         
771                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
772                                 unsigned long long pos = inFASTA.tellg();
773                                 if ((pos == -1) || (pos >= line.end)) { break; }
774                         
775                         #else
776                                 if (inFASTA.eof()) { break; }
777                         #endif
778                         
779                         //report progress
780                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
781                         
782                 }
783                 //report progress
784                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
785                 
786                 
787                 inFASTA.close();
788                 trimFASTAFile.close();
789                 scrapFASTAFile.close();
790                 if (createGroup) {       outGroupsFile.close();   }
791                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
792                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
793                 
794                 return count;
795         }
796         catch(exception& e) {
797                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
798                 exit(1);
799         }
800 }
801
802 /**************************************************************************************************/
803
804 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
805         try {
806         
807         int process = 1;
808                 int exitCommand = 1;
809                 processIDS.clear();
810                 
811 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
812                                 //loop through and create all the processes you want
813                 while (process != processors) {
814                         int pid = fork();
815                         
816                         if (pid > 0) {
817                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
818                                 process++;
819                         }else if (pid == 0){
820                                 
821                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
822                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
823                                 vector<vector<string> > tempNameFileNames = nameFileNames;
824
825                                 if(allFiles){
826                                         ofstream temp;
827
828                                         for(int i=0;i<tempFASTAFileNames.size();i++){
829                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
830                                                         if (tempFASTAFileNames[i][j] != "") {
831                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
832                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
833
834                                                                 if(qFileName != ""){
835                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
836                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
837                                                                 }
838                                                                 if(nameFile != ""){
839                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
840                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
841                                                                 }
842                                                         }
843                                                 }
844                                         }
845                                 }
846                                                         
847                                 driverCreateTrim(filename,
848                                                                  qFileName,
849                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
850                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
851                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
852                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
853                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
854                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
855                                                                  (groupFile + toString(getpid()) + ".temp"),
856                                                                  tempFASTAFileNames,
857                                                                  tempPrimerQualFileNames,
858                                                                  tempNameFileNames,
859                                                                  lines[process],
860                                                                  qLines[process]);
861                                 
862                                 //pass groupCounts to parent
863                                 if(createGroup){
864                                         ofstream out;
865                                         string tempFile = filename + toString(getpid()) + ".num.temp";
866                                         m->openOutputFile(tempFile, out);
867                                         
868                                         out << groupCounts.size() << endl;
869                                         
870                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
871                                                 out << it->first << '\t' << it->second << endl;
872                                         }
873                                         out.close();
874                                 }
875                                 exit(0);
876                         }else { 
877                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
878                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
879                                 exit(0);
880                         }
881                 }
882                 
883                 //parent do my part
884                 ofstream temp;
885                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
886                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
887                 if(qFileName != ""){
888                         m->openOutputFile(trimQualFileName, temp);              temp.close();
889                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
890                 }
891                 if (nameFile != "") {
892                         m->openOutputFile(trimNameFileName, temp);              temp.close();
893                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
894                 }
895
896                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
897                 
898                 //force parent to wait until all the processes are done
899                 for (int i=0;i<processIDS.size();i++) { 
900                         int temp = processIDS[i];
901                         wait(&temp);
902                 }
903 #else
904         //////////////////////////////////////////////////////////////////////////////////////////////////////
905                 //Windows version shared memory, so be careful when passing variables through the trimData struct. 
906                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
907                 //////////////////////////////////////////////////////////////////////////////////////////////////////
908                 
909                 vector<trimData*> pDataArray; 
910                 DWORD   dwThreadIdArray[processors-1];
911                 HANDLE  hThreadArray[processors-1]; 
912                 
913                 //Create processor worker threads.
914                 for( int i=0; i<processors-1; i++){
915                         
916             string extension = "";
917                         if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
918             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
919             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
920             vector<vector<string> > tempNameFileNames = nameFileNames;
921             
922             if(allFiles){
923                 ofstream temp;
924                 
925                 for(int i=0;i<tempFASTAFileNames.size();i++){
926                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
927                         if (tempFASTAFileNames[i][j] != "") {
928                             tempFASTAFileNames[i][j] += extension;
929                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
930                             
931                             if(qFileName != ""){
932                                 tempPrimerQualFileNames[i][j] += extension;
933                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
934                             }
935                             if(nameFile != ""){
936                                 tempNameFileNames[i][j] += extension;
937                                 m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
938                             }
939                         }
940                     }
941                 }
942             }
943
944             
945                         trimData* tempTrim = new trimData(filename,
946                                               qFileName, nameFile,
947                                               (trimFASTAFileName+extension),
948                                               (scrapFASTAFileName+extension),
949                                               (trimQualFileName+extension),
950                                               (scrapQualFileName+extension),
951                                               (trimNameFileName+extension),
952                                               (scrapNameFileName+extension),
953                                               (groupFile+extension),
954                                               tempFASTAFileNames,
955                                               tempPrimerQualFileNames,
956                                               tempNameFileNames,
957                                               lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
958                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer, 
959                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
960                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
961                                              minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
962                         pDataArray.push_back(tempTrim);
963             
964                         hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
965                 }
966         
967         //parent do my part
968                 ofstream temp;
969                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
970                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
971                 if(qFileName != ""){
972                         m->openOutputFile(trimQualFileName, temp);              temp.close();
973                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
974                 }
975                 if (nameFile != "") {
976                         m->openOutputFile(trimNameFileName, temp);              temp.close();
977                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
978                 }
979         
980                 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
981         processIDS.push_back(processors-1);
982
983         
984                 //Wait until all threads have terminated.
985                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
986                 
987                 //Close all thread handles and free memory allocations.
988                 for(int i=0; i < pDataArray.size(); i++){
989                         for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
990                 map<string, int>::iterator it2 = groupCounts.find(it->first);
991                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
992                 else { groupCounts[it->first] += it->second; }
993             }
994             CloseHandle(hThreadArray[i]);
995                         delete pDataArray[i];
996                 }
997         
998 #endif          
999         
1000         
1001         //append files
1002                 for(int i=0;i<processIDS.size();i++){
1003                         
1004                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1005                         
1006                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
1007                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
1008                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1009                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1010                         
1011                         if(qFileName != ""){
1012                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1013                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1014                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1015                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1016                         }
1017                         
1018                         if(nameFile != ""){
1019                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1020                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1021                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1022                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1023                         }
1024                         
1025                         if(createGroup){
1026                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1027                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1028                         }
1029                         
1030                         
1031                         if(allFiles){
1032                                 for(int j=0;j<fastaFileNames.size();j++){
1033                                         for(int k=0;k<fastaFileNames[j].size();k++){
1034                                                 if (fastaFileNames[j][k] != "") {
1035                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1036                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1037                                                         
1038                                                         if(qFileName != ""){
1039                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1040                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1041                                                         }
1042                                                         
1043                                                         if(nameFile != ""){
1044                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1045                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1046                                                         }
1047                                                 }
1048                                         }
1049                                 }
1050                         }
1051                         
1052             #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1053                         if(createGroup){
1054                                 ifstream in;
1055                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1056                                 m->openInputFile(tempFile, in);
1057                                 int tempNum;
1058                                 string group;
1059                                 
1060                                 in >> tempNum; m->gobble(in);
1061                                 
1062                                 if (tempNum != 0) {
1063                                         while (!in.eof()) { 
1064                                                 in >> group >> tempNum; m->gobble(in);
1065                         
1066                                                 map<string, int>::iterator it = groupCounts.find(group);
1067                                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
1068                                                 else { groupCounts[it->first] += tempNum; }
1069                                         }
1070                                 }
1071                                 in.close(); m->mothurRemove(tempFile);
1072                         }
1073             #endif
1074                 }
1075
1076         return exitCommand;
1077         }
1078         catch(exception& e) {
1079                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1080                 exit(1);
1081         }
1082 }
1083
1084 /**************************************************************************************************/
1085
1086 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1087         try {
1088         
1089         vector<unsigned long long> fastaFilePos;
1090                 vector<unsigned long long> qfileFilePos;
1091                 
1092                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1093                 //set file positions for fasta file
1094                 fastaFilePos = m->divideFile(filename, processors);
1095                 
1096                 //get name of first sequence in each chunk
1097                 map<string, int> firstSeqNames;
1098                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1099                         ifstream in;
1100                         m->openInputFile(filename, in);
1101                         in.seekg(fastaFilePos[i]);
1102                 
1103                         Sequence temp(in); 
1104                         firstSeqNames[temp.getName()] = i;
1105                 
1106                         in.close();
1107                 }
1108                 
1109                 if(qfilename != "")     {
1110             //seach for filePos of each first name in the qfile and save in qfileFilePos
1111             ifstream inQual;
1112             m->openInputFile(qfilename, inQual);
1113             
1114             string input;
1115             while(!inQual.eof()){       
1116                 input = m->getline(inQual);
1117                 
1118                 if (input.length() != 0) {
1119                     if(input[0] == '>'){ //this is a sequence name line
1120                         istringstream nameStream(input);
1121                         
1122                         string sname = "";  nameStream >> sname;
1123                         sname = sname.substr(1);
1124                         
1125                         map<string, int>::iterator it = firstSeqNames.find(sname);
1126                         
1127                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
1128                             unsigned long long pos = inQual.tellg(); 
1129                             qfileFilePos.push_back(pos - input.length() - 1);   
1130                             firstSeqNames.erase(it);
1131                         }
1132                     }
1133                 }
1134                 
1135                 if (firstSeqNames.size() == 0) { break; }
1136             }
1137             inQual.close();
1138             
1139             
1140             if (firstSeqNames.size() != 0) { 
1141                 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1142                     m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1143                 }
1144                 qFileName = "";
1145                 return processors;
1146             }
1147             
1148             //get last file position of qfile
1149             FILE * pFile;
1150             unsigned long long size;
1151             
1152             //get num bytes in file
1153             pFile = fopen (qfilename.c_str(),"rb");
1154             if (pFile==NULL) perror ("Error opening file");
1155             else{
1156                 fseek (pFile, 0, SEEK_END);
1157                 size=ftell (pFile);
1158                 fclose (pFile);
1159             }
1160             
1161             qfileFilePos.push_back(size);
1162         }
1163         
1164         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1165                         lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1166                         if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
1167                 }       
1168                 if(qfilename == "")     {       qLines = lines; } //files with duds
1169                 
1170                 return processors;
1171                 
1172                 #else
1173             
1174         if (processors == 1) { //save time
1175                         //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1176                         //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1177             lines.push_back(linePair(0, 1000));
1178             if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
1179         }else{
1180             int numFastaSeqs = 0;
1181             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
1182             if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1183         
1184             if (qfilename != "") { 
1185                 int numQualSeqs = 0;
1186                 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
1187                 
1188                 if (numFastaSeqs != numQualSeqs) {
1189                     m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
1190                 }
1191             }
1192         
1193             //figure out how many sequences you have to process
1194             int numSeqsPerProcessor = numFastaSeqs / processors;
1195             for (int i = 0; i < processors; i++) {
1196                 int startIndex =  i * numSeqsPerProcessor;
1197                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
1198                 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1199                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1200             }
1201         }
1202             if(qfilename == "") {       qLines = lines; } //files with duds
1203                         return 1;
1204                 
1205                 #endif
1206         }
1207         catch(exception& e) {
1208                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1209                 exit(1);
1210         }
1211 }
1212
1213 //***************************************************************************************************************
1214
1215 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1216         try {
1217                 ifstream inOligos;
1218                 m->openInputFile(oligoFile, inOligos);
1219                 
1220                 ofstream test;
1221                 
1222                 string type, oligo, group;
1223
1224                 int indexPrimer = 0;
1225                 int indexBarcode = 0;
1226                 
1227                 while(!inOligos.eof()){
1228
1229                         inOligos >> type; 
1230                                         
1231                         if(type[0] == '#'){
1232                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1233                                 m->gobble(inOligos);
1234                         }
1235                         else{
1236                                 m->gobble(inOligos);
1237                                 //make type case insensitive
1238                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1239                                 
1240                                 inOligos >> oligo;
1241                                 
1242                                 for(int i=0;i<oligo.length();i++){
1243                                         oligo[i] = toupper(oligo[i]);
1244                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1245                                 }
1246                                 
1247                                 if(type == "FORWARD"){
1248                                         group = "";
1249                                         
1250                                         // get rest of line in case there is a primer name
1251                                         while (!inOligos.eof()) {       
1252                                                 char c = inOligos.get(); 
1253                                                 if (c == 10 || c == 13){        break;  }
1254                                                 else if (c == 32 || c == 9){;} //space or tab
1255                                                 else {  group += c;  }
1256                                         } 
1257                                         
1258                                         //check for repeat barcodes
1259                                         map<string, int>::iterator itPrime = primers.find(oligo);
1260                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1261                                         
1262                                         primers[oligo]=indexPrimer; indexPrimer++;              
1263                                         primerNameVector.push_back(group);
1264                                 }
1265                                 else if(type == "REVERSE"){
1266                                         //Sequence oligoRC("reverse", oligo);
1267                                         //oligoRC.reverseComplement();
1268                     string oligoRC = reverseOligo(oligo);
1269                                         revPrimer.push_back(oligoRC);
1270                                 }
1271                                 else if(type == "BARCODE"){
1272                                         inOligos >> group;
1273                     
1274                     //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
1275                     //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
1276                                         string temp = "";
1277                     while (!inOligos.eof())     {       
1278                                                 char c = inOligos.get(); 
1279                                                 if (c == 10 || c == 13){        break;  }
1280                                                 else if (c == 32 || c == 9){;} //space or tab
1281                                                 else {  temp += c;  }
1282                                         } 
1283                                         
1284                     //then this is illumina data with 4 columns
1285                     if (temp != "") {  
1286                         string reverseBarcode = reverseOligo(group); //reverse barcode
1287                         group = temp;
1288                         
1289                         //check for repeat barcodes
1290                         map<string, int>::iterator itBar = rbarcodes.find(reverseBarcode);
1291                         if (itBar != rbarcodes.end()) { m->mothurOut("barcode " + reverseBarcode + " is in your oligos file already."); m->mothurOutEndLine();  }
1292                                                 
1293                         rbarcodes[reverseBarcode]=indexBarcode; 
1294                     }
1295                         
1296                                         //check for repeat barcodes
1297                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1298                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1299                                                 
1300                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1301                                         barcodeNameVector.push_back(group);
1302                                 }else if(type == "LINKER"){
1303                                         linker.push_back(oligo);
1304                                 }else if(type == "SPACER"){
1305                                         spacer.push_back(oligo);
1306                                 }
1307                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
1308                         }
1309                         m->gobble(inOligos);
1310                 }       
1311                 inOligos.close();
1312                 
1313                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1314                 
1315                 //add in potential combos
1316                 if(barcodeNameVector.size() == 0){
1317                         barcodes[""] = 0;
1318                         barcodeNameVector.push_back("");                        
1319                 }
1320                 
1321                 if(primerNameVector.size() == 0){
1322                         primers[""] = 0;
1323                         primerNameVector.push_back("");                 
1324                 }
1325                 
1326                 fastaFileNames.resize(barcodeNameVector.size());
1327                 for(int i=0;i<fastaFileNames.size();i++){
1328                         fastaFileNames[i].assign(primerNameVector.size(), "");
1329                 }
1330                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1331                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1332                 
1333                 if(allFiles){
1334                         set<string> uniqueNames; //used to cleanup outputFileNames
1335                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1336                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1337                                         
1338                                         string primerName = primerNameVector[itPrimer->second];
1339                                         string barcodeName = barcodeNameVector[itBar->second];
1340                                         
1341                                         string comboGroupName = "";
1342                                         string fastaFileName = "";
1343                                         string qualFileName = "";
1344                                         string nameFileName = "";
1345                                         
1346                                         if(primerName == ""){
1347                                                 comboGroupName = barcodeNameVector[itBar->second];
1348                                         }
1349                                         else{
1350                                                 if(barcodeName == ""){
1351                                                         comboGroupName = primerNameVector[itPrimer->second];
1352                                                 }
1353                                                 else{
1354                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1355                                                 }
1356                                         }
1357                                         
1358                                         
1359                                         ofstream temp;
1360                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1361                                         if (uniqueNames.count(fastaFileName) == 0) {
1362                                                 outputNames.push_back(fastaFileName);
1363                                                 outputTypes["fasta"].push_back(fastaFileName);
1364                                                 uniqueNames.insert(fastaFileName);
1365                                         }
1366                                         
1367                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1368                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1369                                         
1370                                         if(qFileName != ""){
1371                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1372                                                 if (uniqueNames.count(qualFileName) == 0) {
1373                                                         outputNames.push_back(qualFileName);
1374                                                         outputTypes["qfile"].push_back(qualFileName);
1375                                                 }
1376                                                 
1377                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1378                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1379                                         }
1380                                         
1381                                         if(nameFile != ""){
1382                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1383                                                 if (uniqueNames.count(nameFileName) == 0) {
1384                                                         outputNames.push_back(nameFileName);
1385                                                         outputTypes["name"].push_back(nameFileName);
1386                                                 }
1387                                                 
1388                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1389                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1390                                         }
1391                                         
1392                                 }
1393                         }
1394                 }
1395                 numFPrimers = primers.size();
1396                 numRPrimers = revPrimer.size();
1397         numLinkers = linker.size();
1398         numSpacers = spacer.size();
1399                 
1400                 bool allBlank = true;
1401                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1402                         if (barcodeNameVector[i] != "") {
1403                                 allBlank = false;
1404                                 break;
1405                         }
1406                 }
1407                 for (int i = 0; i < primerNameVector.size(); i++) {
1408                         if (primerNameVector[i] != "") {
1409                                 allBlank = false;
1410                                 break;
1411                         }
1412                 }
1413                 
1414                 if (allBlank) {
1415                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1416                         allFiles = false;
1417                         return false;
1418                 }
1419                 
1420                 return true;
1421                 
1422         }
1423         catch(exception& e) {
1424                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1425                 exit(1);
1426         }
1427 }
1428 //***************************************************************************************************************
1429
1430 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1431         try {
1432                 bool success = 1;
1433                 if(qscores.getName() != ""){
1434                         qscores.trimQScores(-1, keepFirst);
1435                 }
1436                 sequence.trim(keepFirst);
1437                 return success;
1438         }
1439         catch(exception& e) {
1440                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1441                 exit(1);
1442         }
1443         
1444 }       
1445
1446 //***************************************************************************************************************
1447
1448 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1449         try {
1450                 bool success = 0;
1451                 
1452                 int length = sequence.getNumBases() - removeLast;
1453                 
1454                 if(length > 0){
1455                         if(qscores.getName() != ""){
1456                                 qscores.trimQScores(-1, length);
1457                         }
1458                         sequence.trim(length);
1459                         success = 1;
1460                 }
1461                 else{
1462                         success = 0;
1463                 }
1464
1465                 return success;
1466         }
1467         catch(exception& e) {
1468                 m->errorOut(e, "removeLastTrim", "countDiffs");
1469                 exit(1);
1470         }
1471         
1472 }       
1473
1474 //***************************************************************************************************************
1475
1476 bool TrimSeqsCommand::cullLength(Sequence& seq){
1477         try {
1478         
1479                 int length = seq.getNumBases();
1480                 bool success = 0;       //guilty until proven innocent
1481                 
1482                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1483                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1484                 else                                                                                            {       success = 0;    }
1485                 
1486                 return success;
1487         
1488         }
1489         catch(exception& e) {
1490                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1491                 exit(1);
1492         }
1493         
1494 }
1495
1496 //***************************************************************************************************************
1497
1498 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1499         try {
1500                 int longHomoP = seq.getLongHomoPolymer();
1501                 bool success = 0;       //guilty until proven innocent
1502                 
1503                 if(longHomoP <= maxHomoP){      success = 1;    }
1504                 else                                    {       success = 0;    }
1505                 
1506                 return success;
1507         }
1508         catch(exception& e) {
1509                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1510                 exit(1);
1511         }
1512         
1513 }
1514 //********************************************************************/
1515 string TrimSeqsCommand::reverseOligo(string oligo){
1516         try {
1517         string reverse = "";
1518         
1519         for(int i=oligo.length()-1;i>=0;i--){
1520             
1521             if(oligo[i] == 'A')         {       reverse += 'T'; }
1522             else if(oligo[i] == 'T'){   reverse += 'A'; }
1523             else if(oligo[i] == 'U'){   reverse += 'A'; }
1524             
1525             else if(oligo[i] == 'G'){   reverse += 'C'; }
1526             else if(oligo[i] == 'C'){   reverse += 'G'; }
1527             
1528             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1529             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1530             
1531             else if(oligo[i] == 'M'){   reverse += 'K'; }
1532             else if(oligo[i] == 'K'){   reverse += 'M'; }
1533             
1534             else if(oligo[i] == 'W'){   reverse += 'W'; }
1535             else if(oligo[i] == 'S'){   reverse += 'S'; }
1536             
1537             else if(oligo[i] == 'B'){   reverse += 'V'; }
1538             else if(oligo[i] == 'V'){   reverse += 'B'; }
1539             
1540             else if(oligo[i] == 'D'){   reverse += 'H'; }
1541             else if(oligo[i] == 'H'){   reverse += 'D'; }
1542             
1543             else                                                {       reverse += 'N'; }
1544         }
1545         
1546         
1547         return reverse;
1548     }
1549         catch(exception& e) {
1550                 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1551                 exit(1);
1552         }
1553 }
1554
1555 //***************************************************************************************************************
1556
1557 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1558         try {
1559                 int numNs = seq.getAmbigBases();
1560                 bool success = 0;       //guilty until proven innocent
1561                 
1562                 if(numNs <= maxAmbig)   {       success = 1;    }
1563                 else                                    {       success = 0;    }
1564                 
1565                 return success;
1566         }
1567         catch(exception& e) {
1568                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1569                 exit(1);
1570         }
1571         
1572 }
1573 //***************************************************************************************************************