]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
added load.logfile command. changed summary.single output for subsample=t.
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::setParameters(){        
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
22                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
23                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
24                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
25                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
26                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
27                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
28         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
29                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
30         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
31                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
32                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
33                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
34                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
35                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
36                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
37                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
38                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
39                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
40                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
41                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
42                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
43                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
44                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
45                         
46                 vector<string> myArray;
47                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string TrimSeqsCommand::getHelpString(){        
57         try {
58                 string helpString = "";
59                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
60                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
61                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
62                 helpString += "The fasta parameter is required.\n";
63                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
64                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
65                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
66                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
67                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
68                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
69                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
70                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
71                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
72                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
73         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
74                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
75                 helpString += "The qfile parameter allows you to provide a quality file.\n";
76                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
77                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
78                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
79                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
80                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
81                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
82                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
83                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
84                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
85                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
86                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
87                 helpString += "The trim.seqs command should be in the following format: \n";
88                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
89                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
90                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
91                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
92                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101 string TrimSeqsCommand::getOutputFileNameTag(string type, string inputName=""){ 
102         try {
103         string outputFileName = "";
104                 map<string, vector<string> >::iterator it;
105         
106         //is this a type this command creates
107         it = outputTypes.find(type);
108         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
109         else {
110             if (type == "qfile")            {   outputFileName =  "qual";   }
111             else if (type == "fasta")            {   outputFileName =  "fasta";   }
112             else if (type == "group")            {   outputFileName =  "groups";   }
113             else if (type == "name")            {   outputFileName =  "names";   }
114             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
115         }
116         return outputFileName;
117         }
118         catch(exception& e) {
119                 m->errorOut(e, "TrimSeqsCommand", "getOutputFileNameTag");
120                 exit(1);
121         }
122 }
123
124
125 //**********************************************************************************************************************
126
127 TrimSeqsCommand::TrimSeqsCommand(){     
128         try {
129                 abort = true; calledHelp = true; 
130                 setParameters();
131                 vector<string> tempOutNames;
132                 outputTypes["fasta"] = tempOutNames;
133                 outputTypes["qfile"] = tempOutNames;
134                 outputTypes["group"] = tempOutNames;
135                 outputTypes["name"] = tempOutNames;
136         }
137         catch(exception& e) {
138                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
139                 exit(1);
140         }
141 }
142 //***************************************************************************************************************
143
144 TrimSeqsCommand::TrimSeqsCommand(string option)  {
145         try {
146                 
147                 abort = false; calledHelp = false;   
148                 comboStarts = 0;
149                 
150                 //allow user to run help
151                 if(option == "help") { help(); abort = true; calledHelp = true; }
152                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
153                 
154                 else {
155                         vector<string> myArray = setParameters();
156                         
157                         OptionParser parser(option);
158                         map<string,string> parameters = parser.getParameters();
159                         
160                         ValidParameters validParameter;
161                         map<string,string>::iterator it;
162                         
163                         //check to make sure all parameters are valid for command
164                         for (it = parameters.begin(); it != parameters.end(); it++) { 
165                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
166                         }
167                         
168                         //initialize outputTypes
169                         vector<string> tempOutNames;
170                         outputTypes["fasta"] = tempOutNames;
171                         outputTypes["qfile"] = tempOutNames;
172                         outputTypes["group"] = tempOutNames;
173                         outputTypes["name"] = tempOutNames;
174                         
175                         //if the user changes the input directory command factory will send this info to us in the output parameter 
176                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
177                         if (inputDir == "not found"){   inputDir = "";          }
178                         else {
179                                 string path;
180                                 it = parameters.find("fasta");
181                                 //user has given a template file
182                                 if(it != parameters.end()){ 
183                                         path = m->hasPath(it->second);
184                                         //if the user has not given a path then, add inputdir. else leave path alone.
185                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
186                                 }
187                                 
188                                 it = parameters.find("oligos");
189                                 //user has given a template file
190                                 if(it != parameters.end()){ 
191                                         path = m->hasPath(it->second);
192                                         //if the user has not given a path then, add inputdir. else leave path alone.
193                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
194                                 }
195                                 
196                                 it = parameters.find("qfile");
197                                 //user has given a template file
198                                 if(it != parameters.end()){ 
199                                         path = m->hasPath(it->second);
200                                         //if the user has not given a path then, add inputdir. else leave path alone.
201                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
202                                 }
203                                 
204                                 it = parameters.find("name");
205                                 //user has given a template file
206                                 if(it != parameters.end()){ 
207                                         path = m->hasPath(it->second);
208                                         //if the user has not given a path then, add inputdir. else leave path alone.
209                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
210                                 }
211                                 
212                         }
213
214                         
215                         //check for required parameters
216                         fastaFile = validParameter.validFile(parameters, "fasta", true);
217                         if (fastaFile == "not found") {                                 
218                                 fastaFile = m->getFastaFile(); 
219                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
220                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
221                         }else if (fastaFile == "not open") { abort = true; }    
222                         else { m->setFastaFile(fastaFile); }
223                         
224                         //if the user changes the output directory command factory will send this info to us in the output parameter 
225                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
226                                 outputDir = ""; 
227                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
228                         }
229                 
230                         
231                         //check for optional parameter and set defaults
232                         // ...at some point should added some additional type checking...
233                         string temp;
234                         temp = validParameter.validFile(parameters, "flip", false);
235                         if (temp == "not found")    {   flip = 0;       }
236                         else {  flip = m->isTrue(temp);         }
237                 
238                         temp = validParameter.validFile(parameters, "oligos", true);
239                         if (temp == "not found"){       oligoFile = "";         }
240                         else if(temp == "not open"){    abort = true;   } 
241                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
242                         
243                         
244                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
245                         m->mothurConvert(temp, maxAmbig);  
246
247                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
248                         m->mothurConvert(temp, maxHomoP);  
249
250                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
251                         m->mothurConvert(temp, minLength); 
252                         
253                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
254                         m->mothurConvert(temp, maxLength);
255                         
256                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
257                         m->mothurConvert(temp, bdiffs);
258                         
259                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
260                         m->mothurConvert(temp, pdiffs);
261             
262             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
263                         m->mothurConvert(temp, ldiffs);
264             
265             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
266                         m->mothurConvert(temp, sdiffs);
267                         
268                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
269                         m->mothurConvert(temp, tdiffs);
270                         
271                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
272                         
273                         temp = validParameter.validFile(parameters, "qfile", true);     
274                         if (temp == "not found")        {       qFileName = "";         }
275                         else if(temp == "not open")     {       abort = true;           }
276                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
277                         
278                         temp = validParameter.validFile(parameters, "name", true);      
279                         if (temp == "not found")        {       nameFile = "";          }
280                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
281                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
282                         
283                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
284                         m->mothurConvert(temp, qThreshold);
285                         
286                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
287                         qtrim = m->isTrue(temp);
288
289                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
290                         convert(temp, qRollAverage);
291
292                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
293                         convert(temp, qWindowAverage);
294
295                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
296                         convert(temp, qWindowSize);
297
298                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
299                         convert(temp, qWindowStep);
300
301                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
302                         convert(temp, qAverage);
303
304                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
305                         convert(temp, keepFirst);
306
307                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
308                         convert(temp, removeLast);
309                         
310                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
311                         allFiles = m->isTrue(temp);
312             
313             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
314                         keepforward = m->isTrue(temp);
315                         
316                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
317                         m->setProcessors(temp);
318                         m->mothurConvert(temp, processors); 
319                         
320                         
321                         if(allFiles && (oligoFile == "")){
322                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
323                         }
324                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
325                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
326                                 qAverage=0;
327                                 qThreshold=0;
328                         }
329                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
330                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
331                                 abort = true;
332                         }
333                         
334                         if (nameFile == "") {
335                                 vector<string> files; files.push_back(fastaFile);
336                                 parser.getNameFile(files);
337                         }
338                 }
339
340         }
341         catch(exception& e) {
342                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
343                 exit(1);
344         }
345 }
346 //***************************************************************************************************************
347
348 int TrimSeqsCommand::execute(){
349         try{
350         
351                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
352                 
353                 numFPrimers = 0;  //this needs to be initialized
354                 numRPrimers = 0;
355         numSpacers = 0;
356         numLinkers = 0;
357                 createGroup = false;
358                 vector<vector<string> > fastaFileNames;
359                 vector<vector<string> > qualFileNames;
360                 vector<vector<string> > nameFileNames;
361                 
362                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("fasta");
363                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
364                 
365                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("fasta");
366                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
367                 
368                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("qfile");
369                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("qfile");
370                 
371                 if (qFileName != "") {
372                         outputNames.push_back(trimQualFile);
373                         outputNames.push_back(scrapQualFile);
374                         outputTypes["qfile"].push_back(trimQualFile);
375                         outputTypes["qfile"].push_back(scrapQualFile); 
376                 }
377                 
378                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim." + getOutputFileNameTag("name");
379                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap." + getOutputFileNameTag("name");
380                 
381                 if (nameFile != "") {
382                         m->readNames(nameFile, nameMap);
383                         outputNames.push_back(trimNameFile);
384                         outputNames.push_back(scrapNameFile);
385                         outputTypes["name"].push_back(trimNameFile);
386                         outputTypes["name"].push_back(scrapNameFile); 
387                 }
388                 
389                 if (m->control_pressed) { return 0; }
390                 
391                 string outputGroupFileName;
392                 if(oligoFile != ""){
393                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
394                         if (createGroup) {
395                                 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + getOutputFileNameTag("group");
396                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
397                         }
398                 }
399        
400         //fills lines and qlines
401                 setLines(fastaFile, qFileName);
402                 
403         if(processors == 1){
404             driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
405         }else{
406             createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
407         }       
408                 
409                 
410                 if (m->control_pressed) {  return 0; }                  
411         
412                 if(allFiles){
413                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
414                         map<string, string>::iterator it;
415                         set<string> namesToRemove;
416                         for(int i=0;i<fastaFileNames.size();i++){
417                                 for(int j=0;j<fastaFileNames[0].size();j++){
418                                         if (fastaFileNames[i][j] != "") {
419                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
420                                                         if(m->isBlank(fastaFileNames[i][j])){
421                                                                 m->mothurRemove(fastaFileNames[i][j]);
422                                                                 namesToRemove.insert(fastaFileNames[i][j]);
423                                                         
424                                                                 if(qFileName != ""){
425                                                                         m->mothurRemove(qualFileNames[i][j]);
426                                                                         namesToRemove.insert(qualFileNames[i][j]);
427                                                                 }
428                                                                 
429                                                                 if(nameFile != ""){
430                                                                         m->mothurRemove(nameFileNames[i][j]);
431                                                                         namesToRemove.insert(nameFileNames[i][j]);
432                                                                 }
433                                                         }else{  
434                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
435                                                                 if (it == uniqueFastaNames.end()) {     
436                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
437                                                                 }       
438                                                         }
439                                                 }
440                                         }
441                                 }
442                         }
443                         
444                         //remove names for outputFileNames, just cleans up the output
445                         vector<string> outputNames2;
446                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
447                         outputNames = outputNames2;
448                         
449                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
450                                 ifstream in;
451                                 m->openInputFile(it->first, in);
452                                 
453                                 ofstream out;
454                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + getOutputFileNameTag("group");
455                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
456                                 m->openOutputFile(thisGroupName, out);
457                                 
458                                 while (!in.eof()){
459                                         if (m->control_pressed) { break; }
460                                         
461                                         Sequence currSeq(in); m->gobble(in);
462                                         out << currSeq.getName() << '\t' << it->second << endl;
463                     
464                     if (nameFile != "") {
465                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
466                         if (itName != nameMap.end()) { 
467                             vector<string> thisSeqsNames; 
468                             m->splitAtChar(itName->second, thisSeqsNames, ',');
469                             for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
470                                 out << thisSeqsNames[k] << '\t' << it->second << endl;
471                             }
472                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
473                     }
474                                 }
475                                 in.close();
476                                 out.close();
477                         }
478                 }
479                 
480                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
481
482                 //output group counts
483                 m->mothurOutEndLine();
484                 int total = 0;
485                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
486                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
487                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
488                 }
489                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
490                 
491                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
492
493                 //set fasta file as new current fastafile
494                 string current = "";
495                 itTypes = outputTypes.find("fasta");
496                 if (itTypes != outputTypes.end()) {
497                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
498                 }
499                 
500                 itTypes = outputTypes.find("name");
501                 if (itTypes != outputTypes.end()) {
502                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
503                 }
504                 
505                 itTypes = outputTypes.find("qfile");
506                 if (itTypes != outputTypes.end()) {
507                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
508                 }
509                 
510                 itTypes = outputTypes.find("group");
511                 if (itTypes != outputTypes.end()) {
512                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
513                 }
514
515                 m->mothurOutEndLine();
516                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
517                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
518                 m->mothurOutEndLine();
519                 
520                 return 0;       
521                         
522         }
523         catch(exception& e) {
524                 m->errorOut(e, "TrimSeqsCommand", "execute");
525                 exit(1);
526         }
527 }
528                 
529 /**************************************************************************************/
530
531 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {      
532                 
533         try {
534                 
535                 ofstream trimFASTAFile;
536                 m->openOutputFile(trimFileName, trimFASTAFile);
537                 
538                 ofstream scrapFASTAFile;
539                 m->openOutputFile(scrapFileName, scrapFASTAFile);
540                 
541                 ofstream trimQualFile;
542                 ofstream scrapQualFile;
543                 if(qFileName != ""){
544                         m->openOutputFile(trimQFileName, trimQualFile);
545                         m->openOutputFile(scrapQFileName, scrapQualFile);
546                 }
547                 
548                 ofstream trimNameFile;
549                 ofstream scrapNameFile;
550                 if(nameFile != ""){
551                         m->openOutputFile(trimNFileName, trimNameFile);
552                         m->openOutputFile(scrapNFileName, scrapNameFile);
553                 }
554                 
555                 
556                 ofstream outGroupsFile;
557                 if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
558                 if(allFiles){
559                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
560                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
561                                         if (fastaFileNames[i][j] != "") {
562                                                 ofstream temp;
563                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
564                                                 if(qFileName != ""){
565                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
566                                                 }
567                                                 
568                                                 if(nameFile != ""){
569                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
570                                                 }
571                                         }
572                                 }
573                         }
574                 }
575                 
576                 ifstream inFASTA;
577                 m->openInputFile(filename, inFASTA);
578                 inFASTA.seekg(line.start);
579                 
580                 ifstream qFile;
581                 if(qFileName != "")     {
582                         m->openInputFile(qFileName, qFile);
583                         qFile.seekg(qline.start);  
584                 }
585                 
586                 int count = 0;
587                 bool moreSeqs = 1;
588                 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer);
589         
590                 while (moreSeqs) {
591                                 
592                         if (m->control_pressed) { 
593                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
594                                 if (createGroup) {       outGroupsFile.close();   }
595
596                                 if(qFileName != ""){
597                                         qFile.close();
598                                 }
599                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
600
601                                 return 0;
602                         }
603                         
604                         int success = 1;
605                         string trashCode = "";
606                         int currentSeqsDiffs = 0;
607
608                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
609                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
610                         QualityScores currQual;
611                         if(qFileName != ""){
612                                 currQual = QualityScores(qFile);  m->gobble(qFile);
613                         }
614                         
615                         string origSeq = currSeq.getUnaligned();
616                         if (origSeq != "") {
617                                 
618                                 int barcodeIndex = 0;
619                                 int primerIndex = 0;
620                                 
621                 if(numLinkers != 0){
622                                         success = trimOligos.stripLinker(currSeq, currQual);
623                                         if(success > ldiffs)            {       trashCode += 'k';       }
624                                         else{ currentSeqsDiffs += success;  }
625
626                                 }
627                 
628                                 if(barcodes.size() != 0){
629                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
630                                         if(success > bdiffs)            {       trashCode += 'b';       }
631                                         else{ currentSeqsDiffs += success;  }
632                                 }
633                 
634                 if(rbarcodes.size() != 0){
635                                         success = trimOligos.stripRBarcode(currSeq, currQual, barcodeIndex);
636                                         if(success > bdiffs)            {       trashCode += 'b';       }
637                                         else{ currentSeqsDiffs += success;  }
638                                 }
639                                 
640                 if(numSpacers != 0){
641                                         success = trimOligos.stripSpacer(currSeq, currQual);
642                                         if(success > sdiffs)            {       trashCode += 's';       }
643                                         else{ currentSeqsDiffs += success;  }
644
645                                 }
646                 
647                                 if(numFPrimers != 0){
648                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
649                                         if(success > pdiffs)            {       trashCode += 'f';       }
650                                         else{ currentSeqsDiffs += success;  }
651                                 }
652                                 
653                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
654                                 
655                                 if(numRPrimers != 0){
656                                         success = trimOligos.stripReverse(currSeq, currQual);
657                                         if(!success)                            {       trashCode += 'r';       }
658                                 }
659
660                                 if(keepFirst != 0){
661                                         success = keepFirstTrim(currSeq, currQual);
662                                 }
663                                 
664                                 if(removeLast != 0){
665                                         success = removeLastTrim(currSeq, currQual);
666                                         if(!success)                            {       trashCode += 'l';       }
667                                 }
668
669                                 
670                                 if(qFileName != ""){
671                                         int origLength = currSeq.getNumBases();
672                                         
673                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
674                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
675                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
676                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
677                                         else                                            {       success = 1;                            }
678                                         
679                                         //you don't want to trim, if it fails above then scrap it
680                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
681                                         
682                                         if(!success)                            {       trashCode += 'q';       }
683                                 }                               
684                 
685                                 if(minLength > 0 || maxLength > 0){
686                                         success = cullLength(currSeq);
687                                         if(!success)                            {       trashCode += 'l';       }
688                                 }
689                                 if(maxHomoP > 0){
690                                         success = cullHomoP(currSeq);
691                                         if(!success)                            {       trashCode += 'h';       }
692                                 }
693                                 if(maxAmbig != -1){
694                                         success = cullAmbigs(currSeq);
695                                         if(!success)                            {       trashCode += 'n';       }
696                                 }
697                                 
698                                 if(flip){               // should go last                       
699                                         currSeq.reverseComplement();
700                                         if(qFileName != ""){
701                                                 currQual.flipQScores(); 
702                                         }
703                                 }
704                                 
705                                 if(trashCode.length() == 0){
706                                         currSeq.setAligned(currSeq.getUnaligned());
707                                         currSeq.printSequence(trimFASTAFile);
708                                         
709                                         if(qFileName != ""){
710                                                 currQual.printQScores(trimQualFile);
711                                         }
712                                         
713                     
714                                         if(nameFile != ""){
715                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
716                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
717                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
718                                         }
719                                         
720                                         if (createGroup) {
721                                                 if(barcodes.size() != 0){
722                                                         string thisGroup = barcodeNameVector[barcodeIndex];
723                                                         if (primers.size() != 0) { 
724                                                                 if (primerNameVector[primerIndex] != "") { 
725                                                                         if(thisGroup != "") {
726                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
727                                                                         }else {
728                                                                                 thisGroup = primerNameVector[primerIndex]; 
729                                                                         }
730                                                                 } 
731                                                         }
732                                                         
733                                                         outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
734                                                         
735                             int numRedundants = 0;
736                                                         if (nameFile != "") {
737                                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
738                                                                 if (itName != nameMap.end()) { 
739                                                                         vector<string> thisSeqsNames; 
740                                                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
741                                     numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
742                                                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
743                                                                                 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
744                                                                         }
745                                                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
746                                                         }
747                                                         
748                                                         map<string, int>::iterator it = groupCounts.find(thisGroup);
749                                                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1 + numRedundants; }
750                                                         else { groupCounts[it->first] += (1 + numRedundants); }
751                                                                 
752                                                 }
753                                         }
754                                         
755                                         if(allFiles){
756                                                 ofstream output;
757                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
758                                                 currSeq.printSequence(output);
759                                                 output.close();
760                                                 
761                                                 if(qFileName != ""){
762                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
763                                                         currQual.printQScores(output);
764                                                         output.close();                                                 
765                                                 }
766                                                 
767                                                 if(nameFile != ""){
768                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
769                                                         if (itName != nameMap.end()) { 
770                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
771                                                                 output << itName->first << '\t' << itName->second << endl; 
772                                                                 output.close();
773                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
774                                                 }
775                                         }
776                                 }
777                                 else{
778                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
779                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
780                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
781                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
782                                         }
783                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
784                                         currSeq.setUnaligned(origSeq);
785                                         currSeq.setAligned(origSeq);
786                                         currSeq.printSequence(scrapFASTAFile);
787                                         if(qFileName != ""){
788                                                 currQual.printQScores(scrapQualFile);
789                                         }
790                                 }
791                                 count++;
792                         }
793                         
794                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
795                                 unsigned long long pos = inFASTA.tellg();
796                                 if ((pos == -1) || (pos >= line.end)) { break; }
797                         
798                         #else
799                                 if (inFASTA.eof()) { break; }
800                         #endif
801                         
802                         //report progress
803                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
804                         
805                 }
806                 //report progress
807                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
808                 
809                 
810                 inFASTA.close();
811                 trimFASTAFile.close();
812                 scrapFASTAFile.close();
813                 if (createGroup) {       outGroupsFile.close();   }
814                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
815                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
816                 
817                 return count;
818         }
819         catch(exception& e) {
820                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
821                 exit(1);
822         }
823 }
824
825 /**************************************************************************************************/
826
827 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
828         try {
829         
830         int process = 1;
831                 int exitCommand = 1;
832                 processIDS.clear();
833                 
834 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
835                                 //loop through and create all the processes you want
836                 while (process != processors) {
837                         int pid = fork();
838                         
839                         if (pid > 0) {
840                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
841                                 process++;
842                         }else if (pid == 0){
843                                 
844                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
845                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
846                                 vector<vector<string> > tempNameFileNames = nameFileNames;
847
848                                 if(allFiles){
849                                         ofstream temp;
850
851                                         for(int i=0;i<tempFASTAFileNames.size();i++){
852                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
853                                                         if (tempFASTAFileNames[i][j] != "") {
854                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
855                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
856
857                                                                 if(qFileName != ""){
858                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
859                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
860                                                                 }
861                                                                 if(nameFile != ""){
862                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
863                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
864                                                                 }
865                                                         }
866                                                 }
867                                         }
868                                 }
869                                                         
870                                 driverCreateTrim(filename,
871                                                                  qFileName,
872                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
873                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
874                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
875                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
876                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
877                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
878                                                                  (groupFile + toString(getpid()) + ".temp"),
879                                                                  tempFASTAFileNames,
880                                                                  tempPrimerQualFileNames,
881                                                                  tempNameFileNames,
882                                                                  lines[process],
883                                                                  qLines[process]);
884                                 
885                                 //pass groupCounts to parent
886                                 if(createGroup){
887                                         ofstream out;
888                                         string tempFile = filename + toString(getpid()) + ".num.temp";
889                                         m->openOutputFile(tempFile, out);
890                                         
891                                         out << groupCounts.size() << endl;
892                                         
893                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
894                                                 out << it->first << '\t' << it->second << endl;
895                                         }
896                                         out.close();
897                                 }
898                                 exit(0);
899                         }else { 
900                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
901                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
902                                 exit(0);
903                         }
904                 }
905                 
906                 //parent do my part
907                 ofstream temp;
908                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
909                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
910                 if(qFileName != ""){
911                         m->openOutputFile(trimQualFileName, temp);              temp.close();
912                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
913                 }
914                 if (nameFile != "") {
915                         m->openOutputFile(trimNameFileName, temp);              temp.close();
916                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
917                 }
918
919                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
920                 
921                 //force parent to wait until all the processes are done
922                 for (int i=0;i<processIDS.size();i++) { 
923                         int temp = processIDS[i];
924                         wait(&temp);
925                 }
926 #else
927         //////////////////////////////////////////////////////////////////////////////////////////////////////
928                 //Windows version shared memory, so be careful when passing variables through the trimData struct. 
929                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
930                 //////////////////////////////////////////////////////////////////////////////////////////////////////
931                 
932                 vector<trimData*> pDataArray; 
933                 DWORD   dwThreadIdArray[processors-1];
934                 HANDLE  hThreadArray[processors-1]; 
935                 
936                 //Create processor worker threads.
937                 for( int i=0; i<processors-1; i++){
938                         
939             string extension = "";
940                         if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
941             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
942             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
943             vector<vector<string> > tempNameFileNames = nameFileNames;
944             
945             if(allFiles){
946                 ofstream temp;
947                 
948                 for(int i=0;i<tempFASTAFileNames.size();i++){
949                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
950                         if (tempFASTAFileNames[i][j] != "") {
951                             tempFASTAFileNames[i][j] += extension;
952                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
953                             
954                             if(qFileName != ""){
955                                 tempPrimerQualFileNames[i][j] += extension;
956                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
957                             }
958                             if(nameFile != ""){
959                                 tempNameFileNames[i][j] += extension;
960                                 m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
961                             }
962                         }
963                     }
964                 }
965             }
966
967             
968                         trimData* tempTrim = new trimData(filename,
969                                               qFileName, nameFile,
970                                               (trimFASTAFileName+extension),
971                                               (scrapFASTAFileName+extension),
972                                               (trimQualFileName+extension),
973                                               (scrapQualFileName+extension),
974                                               (trimNameFileName+extension),
975                                               (scrapNameFileName+extension),
976                                               (groupFile+extension),
977                                               tempFASTAFileNames,
978                                               tempPrimerQualFileNames,
979                                               tempNameFileNames,
980                                               lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
981                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer, 
982                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
983                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
984                                              minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
985                         pDataArray.push_back(tempTrim);
986             
987                         hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
988                 }
989         
990         //parent do my part
991                 ofstream temp;
992                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
993                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
994                 if(qFileName != ""){
995                         m->openOutputFile(trimQualFileName, temp);              temp.close();
996                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
997                 }
998                 if (nameFile != "") {
999                         m->openOutputFile(trimNameFileName, temp);              temp.close();
1000                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
1001                 }
1002         
1003                 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
1004         processIDS.push_back(processors-1);
1005
1006         
1007                 //Wait until all threads have terminated.
1008                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1009                 
1010                 //Close all thread handles and free memory allocations.
1011                 for(int i=0; i < pDataArray.size(); i++){
1012                         for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
1013                 map<string, int>::iterator it2 = groupCounts.find(it->first);
1014                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
1015                 else { groupCounts[it->first] += it->second; }
1016             }
1017             CloseHandle(hThreadArray[i]);
1018                         delete pDataArray[i];
1019                 }
1020         
1021 #endif          
1022         
1023         
1024         //append files
1025                 for(int i=0;i<processIDS.size();i++){
1026                         
1027                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1028                         
1029                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
1030                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
1031                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1032                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1033                         
1034                         if(qFileName != ""){
1035                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1036                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1037                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1038                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1039                         }
1040                         
1041                         if(nameFile != ""){
1042                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1043                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1044                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1045                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1046                         }
1047                         
1048                         if(createGroup){
1049                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1050                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1051                         }
1052                         
1053                         
1054                         if(allFiles){
1055                                 for(int j=0;j<fastaFileNames.size();j++){
1056                                         for(int k=0;k<fastaFileNames[j].size();k++){
1057                                                 if (fastaFileNames[j][k] != "") {
1058                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1059                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1060                                                         
1061                                                         if(qFileName != ""){
1062                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1063                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1064                                                         }
1065                                                         
1066                                                         if(nameFile != ""){
1067                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1068                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1069                                                         }
1070                                                 }
1071                                         }
1072                                 }
1073                         }
1074                         
1075             #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1076                         if(createGroup){
1077                                 ifstream in;
1078                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1079                                 m->openInputFile(tempFile, in);
1080                                 int tempNum;
1081                                 string group;
1082                                 
1083                                 in >> tempNum; m->gobble(in);
1084                                 
1085                                 if (tempNum != 0) {
1086                                         while (!in.eof()) { 
1087                                                 in >> group >> tempNum; m->gobble(in);
1088                         
1089                                                 map<string, int>::iterator it = groupCounts.find(group);
1090                                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
1091                                                 else { groupCounts[it->first] += tempNum; }
1092                                         }
1093                                 }
1094                                 in.close(); m->mothurRemove(tempFile);
1095                         }
1096             #endif
1097                 }
1098
1099         return exitCommand;
1100         }
1101         catch(exception& e) {
1102                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1103                 exit(1);
1104         }
1105 }
1106
1107 /**************************************************************************************************/
1108
1109 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1110         try {
1111         
1112         vector<unsigned long long> fastaFilePos;
1113                 vector<unsigned long long> qfileFilePos;
1114                 
1115                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1116                 //set file positions for fasta file
1117                 fastaFilePos = m->divideFile(filename, processors);
1118                 
1119                 //get name of first sequence in each chunk
1120                 map<string, int> firstSeqNames;
1121                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1122                         ifstream in;
1123                         m->openInputFile(filename, in);
1124                         in.seekg(fastaFilePos[i]);
1125                 
1126                         Sequence temp(in); 
1127                         firstSeqNames[temp.getName()] = i;
1128                 
1129                         in.close();
1130                 }
1131                 
1132                 if(qfilename != "")     {
1133             //seach for filePos of each first name in the qfile and save in qfileFilePos
1134             ifstream inQual;
1135             m->openInputFile(qfilename, inQual);
1136             
1137             string input;
1138             while(!inQual.eof()){       
1139                 input = m->getline(inQual);
1140                 
1141                 if (input.length() != 0) {
1142                     if(input[0] == '>'){ //this is a sequence name line
1143                         istringstream nameStream(input);
1144                         
1145                         string sname = "";  nameStream >> sname;
1146                         sname = sname.substr(1);
1147                         
1148                         map<string, int>::iterator it = firstSeqNames.find(sname);
1149                         
1150                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
1151                             unsigned long long pos = inQual.tellg(); 
1152                             qfileFilePos.push_back(pos - input.length() - 1);   
1153                             firstSeqNames.erase(it);
1154                         }
1155                     }
1156                 }
1157                 
1158                 if (firstSeqNames.size() == 0) { break; }
1159             }
1160             inQual.close();
1161             
1162             
1163             if (firstSeqNames.size() != 0) { 
1164                 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1165                     m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1166                 }
1167                 qFileName = "";
1168                 return processors;
1169             }
1170             
1171             //get last file position of qfile
1172             FILE * pFile;
1173             unsigned long long size;
1174             
1175             //get num bytes in file
1176             pFile = fopen (qfilename.c_str(),"rb");
1177             if (pFile==NULL) perror ("Error opening file");
1178             else{
1179                 fseek (pFile, 0, SEEK_END);
1180                 size=ftell (pFile);
1181                 fclose (pFile);
1182             }
1183             
1184             qfileFilePos.push_back(size);
1185         }
1186         
1187         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1188                         lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1189                         if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
1190                 }       
1191                 if(qfilename == "")     {       qLines = lines; } //files with duds
1192                 
1193                 return processors;
1194                 
1195                 #else
1196             
1197         if (processors == 1) { //save time
1198                         //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1199                         //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1200             lines.push_back(linePair(0, 1000));
1201             if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
1202         }else{
1203             int numFastaSeqs = 0;
1204             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
1205             if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1206         
1207             if (qfilename != "") { 
1208                 int numQualSeqs = 0;
1209                 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
1210                 
1211                 if (numFastaSeqs != numQualSeqs) {
1212                     m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
1213                 }
1214             }
1215         
1216             //figure out how many sequences you have to process
1217             int numSeqsPerProcessor = numFastaSeqs / processors;
1218             for (int i = 0; i < processors; i++) {
1219                 int startIndex =  i * numSeqsPerProcessor;
1220                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
1221                 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1222                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1223             }
1224         }
1225             if(qfilename == "") {       qLines = lines; } //files with duds
1226                         return 1;
1227                 
1228                 #endif
1229         }
1230         catch(exception& e) {
1231                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1232                 exit(1);
1233         }
1234 }
1235
1236 //***************************************************************************************************************
1237
1238 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1239         try {
1240                 ifstream inOligos;
1241                 m->openInputFile(oligoFile, inOligos);
1242                 
1243                 ofstream test;
1244                 
1245                 string type, oligo, group;
1246
1247                 int indexPrimer = 0;
1248                 int indexBarcode = 0;
1249                 
1250                 while(!inOligos.eof()){
1251
1252                         inOligos >> type; 
1253                                         
1254                         if(type[0] == '#'){
1255                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1256                                 m->gobble(inOligos);
1257                         }
1258                         else{
1259                                 m->gobble(inOligos);
1260                                 //make type case insensitive
1261                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1262                                 
1263                                 inOligos >> oligo;
1264                                 
1265                                 for(int i=0;i<oligo.length();i++){
1266                                         oligo[i] = toupper(oligo[i]);
1267                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1268                                 }
1269                                 
1270                                 if(type == "FORWARD"){
1271                                         group = "";
1272                                         
1273                                         // get rest of line in case there is a primer name
1274                                         while (!inOligos.eof()) {       
1275                                                 char c = inOligos.get(); 
1276                                                 if (c == 10 || c == 13){        break;  }
1277                                                 else if (c == 32 || c == 9){;} //space or tab
1278                                                 else {  group += c;  }
1279                                         } 
1280                                         
1281                                         //check for repeat barcodes
1282                                         map<string, int>::iterator itPrime = primers.find(oligo);
1283                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1284                                         
1285                                         primers[oligo]=indexPrimer; indexPrimer++;              
1286                                         primerNameVector.push_back(group);
1287                                 }
1288                                 else if(type == "REVERSE"){
1289                                         //Sequence oligoRC("reverse", oligo);
1290                                         //oligoRC.reverseComplement();
1291                     string oligoRC = reverseOligo(oligo);
1292                                         revPrimer.push_back(oligoRC);
1293                                 }
1294                                 else if(type == "BARCODE"){
1295                                         inOligos >> group;
1296                     
1297                     //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
1298                     //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
1299                                         string temp = "";
1300                     while (!inOligos.eof())     {       
1301                                                 char c = inOligos.get(); 
1302                                                 if (c == 10 || c == 13){        break;  }
1303                                                 else if (c == 32 || c == 9){;} //space or tab
1304                                                 else {  temp += c;  }
1305                                         } 
1306                                         
1307                     //then this is illumina data with 4 columns
1308                     if (temp != "") {  
1309                         string reverseBarcode = reverseOligo(group); //reverse barcode
1310                         group = temp;
1311                         
1312                         //check for repeat barcodes
1313                         map<string, int>::iterator itBar = rbarcodes.find(reverseBarcode);
1314                         if (itBar != rbarcodes.end()) { m->mothurOut("barcode " + reverseBarcode + " is in your oligos file already."); m->mothurOutEndLine();  }
1315                                                 
1316                         rbarcodes[reverseBarcode]=indexBarcode; 
1317                     }
1318                         
1319                                         //check for repeat barcodes
1320                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1321                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1322                                                 
1323                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1324                                         barcodeNameVector.push_back(group);
1325                                 }else if(type == "LINKER"){
1326                                         linker.push_back(oligo);
1327                                 }else if(type == "SPACER"){
1328                                         spacer.push_back(oligo);
1329                                 }
1330                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
1331                         }
1332                         m->gobble(inOligos);
1333                 }       
1334                 inOligos.close();
1335                 
1336                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1337                 
1338                 //add in potential combos
1339                 if(barcodeNameVector.size() == 0){
1340                         barcodes[""] = 0;
1341                         barcodeNameVector.push_back("");                        
1342                 }
1343                 
1344                 if(primerNameVector.size() == 0){
1345                         primers[""] = 0;
1346                         primerNameVector.push_back("");                 
1347                 }
1348                 
1349                 fastaFileNames.resize(barcodeNameVector.size());
1350                 for(int i=0;i<fastaFileNames.size();i++){
1351                         fastaFileNames[i].assign(primerNameVector.size(), "");
1352                 }
1353                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1354                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1355                 
1356                 if(allFiles){
1357                         set<string> uniqueNames; //used to cleanup outputFileNames
1358                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1359                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1360                                         
1361                                         string primerName = primerNameVector[itPrimer->second];
1362                                         string barcodeName = barcodeNameVector[itBar->second];
1363                                         
1364                                         string comboGroupName = "";
1365                                         string fastaFileName = "";
1366                                         string qualFileName = "";
1367                                         string nameFileName = "";
1368                                         
1369                                         if(primerName == ""){
1370                                                 comboGroupName = barcodeNameVector[itBar->second];
1371                                         }
1372                                         else{
1373                                                 if(barcodeName == ""){
1374                                                         comboGroupName = primerNameVector[itPrimer->second];
1375                                                 }
1376                                                 else{
1377                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1378                                                 }
1379                                         }
1380                                         
1381                                         
1382                                         ofstream temp;
1383                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1384                                         if (uniqueNames.count(fastaFileName) == 0) {
1385                                                 outputNames.push_back(fastaFileName);
1386                                                 outputTypes["fasta"].push_back(fastaFileName);
1387                                                 uniqueNames.insert(fastaFileName);
1388                                         }
1389                                         
1390                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1391                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1392                                         
1393                                         if(qFileName != ""){
1394                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1395                                                 if (uniqueNames.count(qualFileName) == 0) {
1396                                                         outputNames.push_back(qualFileName);
1397                                                         outputTypes["qfile"].push_back(qualFileName);
1398                                                 }
1399                                                 
1400                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1401                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1402                                         }
1403                                         
1404                                         if(nameFile != ""){
1405                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1406                                                 if (uniqueNames.count(nameFileName) == 0) {
1407                                                         outputNames.push_back(nameFileName);
1408                                                         outputTypes["name"].push_back(nameFileName);
1409                                                 }
1410                                                 
1411                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1412                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1413                                         }
1414                                         
1415                                 }
1416                         }
1417                 }
1418                 numFPrimers = primers.size();
1419                 numRPrimers = revPrimer.size();
1420         numLinkers = linker.size();
1421         numSpacers = spacer.size();
1422                 
1423                 bool allBlank = true;
1424                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1425                         if (barcodeNameVector[i] != "") {
1426                                 allBlank = false;
1427                                 break;
1428                         }
1429                 }
1430                 for (int i = 0; i < primerNameVector.size(); i++) {
1431                         if (primerNameVector[i] != "") {
1432                                 allBlank = false;
1433                                 break;
1434                         }
1435                 }
1436                 
1437                 if (allBlank) {
1438                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1439                         allFiles = false;
1440                         return false;
1441                 }
1442                 
1443                 return true;
1444                 
1445         }
1446         catch(exception& e) {
1447                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1448                 exit(1);
1449         }
1450 }
1451 //***************************************************************************************************************
1452
1453 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1454         try {
1455                 bool success = 1;
1456                 if(qscores.getName() != ""){
1457                         qscores.trimQScores(-1, keepFirst);
1458                 }
1459                 sequence.trim(keepFirst);
1460                 return success;
1461         }
1462         catch(exception& e) {
1463                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1464                 exit(1);
1465         }
1466         
1467 }       
1468
1469 //***************************************************************************************************************
1470
1471 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1472         try {
1473                 bool success = 0;
1474                 
1475                 int length = sequence.getNumBases() - removeLast;
1476                 
1477                 if(length > 0){
1478                         if(qscores.getName() != ""){
1479                                 qscores.trimQScores(-1, length);
1480                         }
1481                         sequence.trim(length);
1482                         success = 1;
1483                 }
1484                 else{
1485                         success = 0;
1486                 }
1487
1488                 return success;
1489         }
1490         catch(exception& e) {
1491                 m->errorOut(e, "removeLastTrim", "countDiffs");
1492                 exit(1);
1493         }
1494         
1495 }       
1496
1497 //***************************************************************************************************************
1498
1499 bool TrimSeqsCommand::cullLength(Sequence& seq){
1500         try {
1501         
1502                 int length = seq.getNumBases();
1503                 bool success = 0;       //guilty until proven innocent
1504                 
1505                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1506                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1507                 else                                                                                            {       success = 0;    }
1508                 
1509                 return success;
1510         
1511         }
1512         catch(exception& e) {
1513                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1514                 exit(1);
1515         }
1516         
1517 }
1518
1519 //***************************************************************************************************************
1520
1521 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1522         try {
1523                 int longHomoP = seq.getLongHomoPolymer();
1524                 bool success = 0;       //guilty until proven innocent
1525                 
1526                 if(longHomoP <= maxHomoP){      success = 1;    }
1527                 else                                    {       success = 0;    }
1528                 
1529                 return success;
1530         }
1531         catch(exception& e) {
1532                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1533                 exit(1);
1534         }
1535         
1536 }
1537 //********************************************************************/
1538 string TrimSeqsCommand::reverseOligo(string oligo){
1539         try {
1540         string reverse = "";
1541         
1542         for(int i=oligo.length()-1;i>=0;i--){
1543             
1544             if(oligo[i] == 'A')         {       reverse += 'T'; }
1545             else if(oligo[i] == 'T'){   reverse += 'A'; }
1546             else if(oligo[i] == 'U'){   reverse += 'A'; }
1547             
1548             else if(oligo[i] == 'G'){   reverse += 'C'; }
1549             else if(oligo[i] == 'C'){   reverse += 'G'; }
1550             
1551             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1552             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1553             
1554             else if(oligo[i] == 'M'){   reverse += 'K'; }
1555             else if(oligo[i] == 'K'){   reverse += 'M'; }
1556             
1557             else if(oligo[i] == 'W'){   reverse += 'W'; }
1558             else if(oligo[i] == 'S'){   reverse += 'S'; }
1559             
1560             else if(oligo[i] == 'B'){   reverse += 'V'; }
1561             else if(oligo[i] == 'V'){   reverse += 'B'; }
1562             
1563             else if(oligo[i] == 'D'){   reverse += 'H'; }
1564             else if(oligo[i] == 'H'){   reverse += 'D'; }
1565             
1566             else                                                {       reverse += 'N'; }
1567         }
1568         
1569         
1570         return reverse;
1571     }
1572         catch(exception& e) {
1573                 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1574                 exit(1);
1575         }
1576 }
1577
1578 //***************************************************************************************************************
1579
1580 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1581         try {
1582                 int numNs = seq.getAmbigBases();
1583                 bool success = 0;       //guilty until proven innocent
1584                 
1585                 if(numNs <= maxAmbig)   {       success = 1;    }
1586                 else                                    {       success = 0;    }
1587                 
1588                 return success;
1589         }
1590         catch(exception& e) {
1591                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1592                 exit(1);
1593         }
1594         
1595 }
1596 //***************************************************************************************************************