]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::setParameters(){        
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
22                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
23                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
24                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
25                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
26                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
27                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
28         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
29                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
30         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
31                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
32                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
33                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
34                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
35                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
36                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
37                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
38                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
39                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
40                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
41                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
42                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
43                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
44                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
45                         
46                 vector<string> myArray;
47                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string TrimSeqsCommand::getHelpString(){        
57         try {
58                 string helpString = "";
59                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
60                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
61                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
62                 helpString += "The fasta parameter is required.\n";
63                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
64                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
65                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
66                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
67                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
68                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
69                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
70                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
71                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
72                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
73         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
74                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
75                 helpString += "The qfile parameter allows you to provide a quality file.\n";
76                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
77                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
78                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
79                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
80                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
81                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
82                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
83                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
84                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
85                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
86                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
87                 helpString += "The trim.seqs command should be in the following format: \n";
88                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
89                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
90                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
91                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
92                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101 string TrimSeqsCommand::getOutputFileNameTag(string type, string inputName=""){ 
102         try {
103         string outputFileName = "";
104                 map<string, vector<string> >::iterator it;
105         
106         //is this a type this command creates
107         it = outputTypes.find(type);
108         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
109         else {
110             if (type == "qfile")            {   outputFileName =  "qual";   }
111             else if (type == "fasta")            {   outputFileName =  "fasta";   }
112             else if (type == "group")            {   outputFileName =  "groups";   }
113             else if (type == "name")            {   outputFileName =  "names";   }
114             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
115         }
116         return outputFileName;
117         }
118         catch(exception& e) {
119                 m->errorOut(e, "TrimSeqsCommand", "getOutputFileNameTag");
120                 exit(1);
121         }
122 }
123
124
125 //**********************************************************************************************************************
126
127 TrimSeqsCommand::TrimSeqsCommand(){     
128         try {
129                 abort = true; calledHelp = true; 
130                 setParameters();
131                 vector<string> tempOutNames;
132                 outputTypes["fasta"] = tempOutNames;
133                 outputTypes["qfile"] = tempOutNames;
134                 outputTypes["group"] = tempOutNames;
135                 outputTypes["name"] = tempOutNames;
136         }
137         catch(exception& e) {
138                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
139                 exit(1);
140         }
141 }
142 //***************************************************************************************************************
143
144 TrimSeqsCommand::TrimSeqsCommand(string option)  {
145         try {
146                 
147                 abort = false; calledHelp = false;   
148                 comboStarts = 0;
149                 
150                 //allow user to run help
151                 if(option == "help") { help(); abort = true; calledHelp = true; }
152                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
153                 
154                 else {
155                         vector<string> myArray = setParameters();
156                         
157                         OptionParser parser(option);
158                         map<string,string> parameters = parser.getParameters();
159                         
160                         ValidParameters validParameter;
161                         map<string,string>::iterator it;
162                         
163                         //check to make sure all parameters are valid for command
164                         for (it = parameters.begin(); it != parameters.end(); it++) { 
165                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
166                         }
167                         
168                         //initialize outputTypes
169                         vector<string> tempOutNames;
170                         outputTypes["fasta"] = tempOutNames;
171                         outputTypes["qfile"] = tempOutNames;
172                         outputTypes["group"] = tempOutNames;
173                         outputTypes["name"] = tempOutNames;
174                         
175                         //if the user changes the input directory command factory will send this info to us in the output parameter 
176                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
177                         if (inputDir == "not found"){   inputDir = "";          }
178                         else {
179                                 string path;
180                                 it = parameters.find("fasta");
181                                 //user has given a template file
182                                 if(it != parameters.end()){ 
183                                         path = m->hasPath(it->second);
184                                         //if the user has not given a path then, add inputdir. else leave path alone.
185                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
186                                 }
187                                 
188                                 it = parameters.find("oligos");
189                                 //user has given a template file
190                                 if(it != parameters.end()){ 
191                                         path = m->hasPath(it->second);
192                                         //if the user has not given a path then, add inputdir. else leave path alone.
193                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
194                                 }
195                                 
196                                 it = parameters.find("qfile");
197                                 //user has given a template file
198                                 if(it != parameters.end()){ 
199                                         path = m->hasPath(it->second);
200                                         //if the user has not given a path then, add inputdir. else leave path alone.
201                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
202                                 }
203                                 
204                                 it = parameters.find("name");
205                                 //user has given a template file
206                                 if(it != parameters.end()){ 
207                                         path = m->hasPath(it->second);
208                                         //if the user has not given a path then, add inputdir. else leave path alone.
209                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
210                                 }
211                                 
212                         }
213
214                         
215                         //check for required parameters
216                         fastaFile = validParameter.validFile(parameters, "fasta", true);
217                         if (fastaFile == "not found") {                                 
218                                 fastaFile = m->getFastaFile(); 
219                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
220                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
221                         }else if (fastaFile == "not open") { abort = true; }    
222                         else { m->setFastaFile(fastaFile); }
223                         
224                         //if the user changes the output directory command factory will send this info to us in the output parameter 
225                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
226                                 outputDir = ""; 
227                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
228                         }
229                 
230                         
231                         //check for optional parameter and set defaults
232                         // ...at some point should added some additional type checking...
233                         string temp;
234                         temp = validParameter.validFile(parameters, "flip", false);
235                         if (temp == "not found")    {   flip = 0;       }
236                         else {  flip = m->isTrue(temp);         }
237                 
238                         temp = validParameter.validFile(parameters, "oligos", true);
239                         if (temp == "not found"){       oligoFile = "";         }
240                         else if(temp == "not open"){    abort = true;   } 
241                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
242                         
243                         
244                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
245                         m->mothurConvert(temp, maxAmbig);  
246
247                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
248                         m->mothurConvert(temp, maxHomoP);  
249
250                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
251                         m->mothurConvert(temp, minLength); 
252                         
253                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
254                         m->mothurConvert(temp, maxLength);
255                         
256                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
257                         m->mothurConvert(temp, bdiffs);
258                         
259                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
260                         m->mothurConvert(temp, pdiffs);
261             
262             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
263                         m->mothurConvert(temp, ldiffs);
264             
265             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
266                         m->mothurConvert(temp, sdiffs);
267                         
268                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
269                         m->mothurConvert(temp, tdiffs);
270                         
271                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
272                         
273                         temp = validParameter.validFile(parameters, "qfile", true);     
274                         if (temp == "not found")        {       qFileName = "";         }
275                         else if(temp == "not open")     {       abort = true;           }
276                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
277                         
278                         temp = validParameter.validFile(parameters, "name", true);      
279                         if (temp == "not found")        {       nameFile = "";          }
280                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
281                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
282                         
283                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
284                         m->mothurConvert(temp, qThreshold);
285                         
286                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
287                         qtrim = m->isTrue(temp);
288
289                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
290                         convert(temp, qRollAverage);
291
292                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
293                         convert(temp, qWindowAverage);
294
295                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
296                         convert(temp, qWindowSize);
297
298                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
299                         convert(temp, qWindowStep);
300
301                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
302                         convert(temp, qAverage);
303
304                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
305                         convert(temp, keepFirst);
306
307                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
308                         convert(temp, removeLast);
309                         
310                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
311                         allFiles = m->isTrue(temp);
312             
313             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
314                         keepforward = m->isTrue(temp);
315                         
316                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
317                         m->setProcessors(temp);
318                         m->mothurConvert(temp, processors); 
319                         
320                         
321                         if(allFiles && (oligoFile == "")){
322                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
323                         }
324                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
325                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
326                                 qAverage=0;
327                                 qThreshold=0;
328                         }
329                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
330                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
331                                 abort = true;
332                         }
333                         
334                         if (nameFile == "") {
335                                 vector<string> files; files.push_back(fastaFile);
336                                 parser.getNameFile(files);
337                         }
338                 }
339
340         }
341         catch(exception& e) {
342                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
343                 exit(1);
344         }
345 }
346 //***************************************************************************************************************
347
348 int TrimSeqsCommand::execute(){
349         try{
350         
351                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
352                 
353                 numFPrimers = 0;  //this needs to be initialized
354                 numRPrimers = 0;
355         numSpacers = 0;
356         numLinkers = 0;
357                 createGroup = false;
358                 vector<vector<string> > fastaFileNames;
359                 vector<vector<string> > qualFileNames;
360                 vector<vector<string> > nameFileNames;
361                 
362                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("fasta");
363                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
364                 
365                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("fasta");
366                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
367                 
368                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("qfile");
369                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("qfile");
370                 
371                 if (qFileName != "") {
372                         outputNames.push_back(trimQualFile);
373                         outputNames.push_back(scrapQualFile);
374                         outputTypes["qfile"].push_back(trimQualFile);
375                         outputTypes["qfile"].push_back(scrapQualFile); 
376                 }
377                 
378                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim." + getOutputFileNameTag("name");
379                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap." + getOutputFileNameTag("name");
380                 
381                 if (nameFile != "") {
382                         m->readNames(nameFile, nameMap);
383                         outputNames.push_back(trimNameFile);
384                         outputNames.push_back(scrapNameFile);
385                         outputTypes["name"].push_back(trimNameFile);
386                         outputTypes["name"].push_back(scrapNameFile); 
387                 }
388                 
389                 if (m->control_pressed) { return 0; }
390                 
391                 string outputGroupFileName;
392                 if(oligoFile != ""){
393                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
394                         if (createGroup) {
395                                 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + getOutputFileNameTag("group");
396                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
397                         }
398                 }
399        
400         //fills lines and qlines
401                 setLines(fastaFile, qFileName);
402                 
403         if(processors == 1){
404             driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
405         }else{
406             createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
407         }       
408                 
409                 
410                 if (m->control_pressed) {  return 0; }                  
411         
412                 if(allFiles){
413                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
414                         map<string, string>::iterator it;
415                         set<string> namesToRemove;
416                         for(int i=0;i<fastaFileNames.size();i++){
417                                 for(int j=0;j<fastaFileNames[0].size();j++){
418                                         if (fastaFileNames[i][j] != "") {
419                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
420                                                         if(m->isBlank(fastaFileNames[i][j])){
421                                                                 m->mothurRemove(fastaFileNames[i][j]);
422                                                                 namesToRemove.insert(fastaFileNames[i][j]);
423                                                         
424                                                                 if(qFileName != ""){
425                                                                         m->mothurRemove(qualFileNames[i][j]);
426                                                                         namesToRemove.insert(qualFileNames[i][j]);
427                                                                 }
428                                                                 
429                                                                 if(nameFile != ""){
430                                                                         m->mothurRemove(nameFileNames[i][j]);
431                                                                         namesToRemove.insert(nameFileNames[i][j]);
432                                                                 }
433                                                         }else{  
434                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
435                                                                 if (it == uniqueFastaNames.end()) {     
436                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
437                                                                 }       
438                                                         }
439                                                 }
440                                         }
441                                 }
442                         }
443                         
444                         //remove names for outputFileNames, just cleans up the output
445                         vector<string> outputNames2;
446                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
447                         outputNames = outputNames2;
448                         
449                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
450                                 ifstream in;
451                                 m->openInputFile(it->first, in);
452                                 
453                                 ofstream out;
454                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + getOutputFileNameTag("group");
455                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
456                                 m->openOutputFile(thisGroupName, out);
457                                 
458                                 while (!in.eof()){
459                                         if (m->control_pressed) { break; }
460                                         
461                                         Sequence currSeq(in); m->gobble(in);
462                                         out << currSeq.getName() << '\t' << it->second << endl;
463                     
464                     if (nameFile != "") {
465                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
466                         if (itName != nameMap.end()) { 
467                             vector<string> thisSeqsNames; 
468                             m->splitAtChar(itName->second, thisSeqsNames, ',');
469                             for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
470                                 out << thisSeqsNames[k] << '\t' << it->second << endl;
471                             }
472                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
473                     }
474                                 }
475                                 in.close();
476                                 out.close();
477                         }
478                 }
479                 
480                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
481
482                 //output group counts
483                 m->mothurOutEndLine();
484                 int total = 0;
485                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
486                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
487                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
488                 }
489                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
490                 
491                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
492
493                 //set fasta file as new current fastafile
494                 string current = "";
495                 itTypes = outputTypes.find("fasta");
496                 if (itTypes != outputTypes.end()) {
497                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
498                 }
499                 
500                 itTypes = outputTypes.find("name");
501                 if (itTypes != outputTypes.end()) {
502                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
503                 }
504                 
505                 itTypes = outputTypes.find("qfile");
506                 if (itTypes != outputTypes.end()) {
507                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
508                 }
509                 
510                 itTypes = outputTypes.find("group");
511                 if (itTypes != outputTypes.end()) {
512                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
513                 }
514
515                 m->mothurOutEndLine();
516                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
517                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
518                 m->mothurOutEndLine();
519                 
520                 return 0;       
521                         
522         }
523         catch(exception& e) {
524                 m->errorOut(e, "TrimSeqsCommand", "execute");
525                 exit(1);
526         }
527 }
528                 
529 /**************************************************************************************/
530
531 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {      
532                 
533         try {
534                 
535                 ofstream trimFASTAFile;
536                 m->openOutputFile(trimFileName, trimFASTAFile);
537                 
538                 ofstream scrapFASTAFile;
539                 m->openOutputFile(scrapFileName, scrapFASTAFile);
540                 
541                 ofstream trimQualFile;
542                 ofstream scrapQualFile;
543                 if(qFileName != ""){
544                         m->openOutputFile(trimQFileName, trimQualFile);
545                         m->openOutputFile(scrapQFileName, scrapQualFile);
546                 }
547                 
548                 ofstream trimNameFile;
549                 ofstream scrapNameFile;
550                 if(nameFile != ""){
551                         m->openOutputFile(trimNFileName, trimNameFile);
552                         m->openOutputFile(scrapNFileName, scrapNameFile);
553                 }
554                 
555                 
556                 ofstream outGroupsFile;
557                 if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
558                 if(allFiles){
559                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
560                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
561                                         if (fastaFileNames[i][j] != "") {
562                                                 ofstream temp;
563                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
564                                                 if(qFileName != ""){
565                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
566                                                 }
567                                                 
568                                                 if(nameFile != ""){
569                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
570                                                 }
571                                         }
572                                 }
573                         }
574                 }
575                 
576                 ifstream inFASTA;
577                 m->openInputFile(filename, inFASTA);
578                 inFASTA.seekg(line.start);
579                 
580                 ifstream qFile;
581                 if(qFileName != "")     {
582                         m->openInputFile(qFileName, qFile);
583                         qFile.seekg(qline.start);  
584                 }
585                 
586                 int count = 0;
587                 bool moreSeqs = 1;
588                 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer);
589         
590                 while (moreSeqs) {
591                                 
592                         if (m->control_pressed) { 
593                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
594                                 if (createGroup) {       outGroupsFile.close();   }
595
596                                 if(qFileName != ""){
597                                         qFile.close();
598                                 }
599                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
600
601                                 return 0;
602                         }
603                         
604                         int success = 1;
605                         string trashCode = "";
606                         int currentSeqsDiffs = 0;
607
608                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
609                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
610             
611                         QualityScores currQual;
612                         if(qFileName != ""){
613                                 currQual = QualityScores(qFile);  m->gobble(qFile);
614                 if ((m->debug)&&(count>15800)) { m->mothurOut("[DEBUG]: " + toString(count) + " fasta = " + currSeq.getName() + '\n'); m->mothurOut("[DEBUG]: " + toString(getpid()) + '\n'); }
615                         }
616                         
617                         string origSeq = currSeq.getUnaligned();
618                         if (origSeq != "") {
619                                 
620                                 int barcodeIndex = 0;
621                                 int primerIndex = 0;
622                                 
623                 if(numLinkers != 0){
624                                         success = trimOligos.stripLinker(currSeq, currQual);
625                                         if(success > ldiffs)            {       trashCode += 'k';       }
626                                         else{ currentSeqsDiffs += success;  }
627
628                                 }
629                 
630                                 if(barcodes.size() != 0){
631                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
632                                         if(success > bdiffs)            {       trashCode += 'b';       }
633                                         else{ currentSeqsDiffs += success;  }
634                                 }
635                 
636                 if(rbarcodes.size() != 0){
637                                         success = trimOligos.stripRBarcode(currSeq, currQual, barcodeIndex);
638                                         if(success > bdiffs)            {       trashCode += 'b';       }
639                                         else{ currentSeqsDiffs += success;  }
640                                 }
641                                 
642                 if(numSpacers != 0){
643                                         success = trimOligos.stripSpacer(currSeq, currQual);
644                                         if(success > sdiffs)            {       trashCode += 's';       }
645                                         else{ currentSeqsDiffs += success;  }
646
647                                 }
648                 
649                                 if(numFPrimers != 0){
650                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
651                                         if(success > pdiffs)            {       trashCode += 'f';       }
652                                         else{ currentSeqsDiffs += success;  }
653                                 }
654                                 
655                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
656                                 
657                                 if(numRPrimers != 0){
658                                         success = trimOligos.stripReverse(currSeq, currQual);
659                                         if(!success)                            {       trashCode += 'r';       }
660                                 }
661
662                                 if(keepFirst != 0){
663                                         success = keepFirstTrim(currSeq, currQual);
664                                 }
665                                 
666                                 if(removeLast != 0){
667                                         success = removeLastTrim(currSeq, currQual);
668                                         if(!success)                            {       trashCode += 'l';       }
669                                 }
670
671                                 
672                                 if(qFileName != ""){
673                                         int origLength = currSeq.getNumBases();
674                                         
675                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
676                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
677                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
678                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
679                                         else                                            {       success = 1;                            }
680                                         
681                                         //you don't want to trim, if it fails above then scrap it
682                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
683                                         
684                                         if(!success)                            {       trashCode += 'q';       }
685                                 }                               
686                 
687                                 if(minLength > 0 || maxLength > 0){
688                                         success = cullLength(currSeq);
689                                         if(!success)                            {       trashCode += 'l';       }
690                                 }
691                                 if(maxHomoP > 0){
692                                         success = cullHomoP(currSeq);
693                                         if(!success)                            {       trashCode += 'h';       }
694                                 }
695                                 if(maxAmbig != -1){
696                                         success = cullAmbigs(currSeq);
697                                         if(!success)                            {       trashCode += 'n';       }
698                                 }
699                                 
700                                 if(flip){               // should go last                       
701                                         currSeq.reverseComplement();
702                                         if(qFileName != ""){
703                                                 currQual.flipQScores(); 
704                                         }
705                                 }
706                                 
707                                 if(trashCode.length() == 0){
708                                         currSeq.setAligned(currSeq.getUnaligned());
709                                         currSeq.printSequence(trimFASTAFile);
710                                         
711                                         if(qFileName != ""){
712                                                 currQual.printQScores(trimQualFile);
713                                         }
714                                         
715                     
716                                         if(nameFile != ""){
717                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
718                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
719                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
720                                         }
721                                         
722                                         if (createGroup) {
723                                                 if(barcodes.size() != 0){
724                                                         string thisGroup = barcodeNameVector[barcodeIndex];
725                                                         if (primers.size() != 0) { 
726                                                                 if (primerNameVector[primerIndex] != "") { 
727                                                                         if(thisGroup != "") {
728                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
729                                                                         }else {
730                                                                                 thisGroup = primerNameVector[primerIndex]; 
731                                                                         }
732                                                                 } 
733                                                         }
734                                                         
735                                                         outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
736                                                         
737                             int numRedundants = 0;
738                                                         if (nameFile != "") {
739                                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
740                                                                 if (itName != nameMap.end()) { 
741                                                                         vector<string> thisSeqsNames; 
742                                                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
743                                     numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
744                                                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
745                                                                                 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
746                                                                         }
747                                                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
748                                                         }
749                                                         
750                                                         map<string, int>::iterator it = groupCounts.find(thisGroup);
751                                                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1 + numRedundants; }
752                                                         else { groupCounts[it->first] += (1 + numRedundants); }
753                                                                 
754                                                 }
755                                         }
756                                         
757                                         if(allFiles){
758                                                 ofstream output;
759                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
760                                                 currSeq.printSequence(output);
761                                                 output.close();
762                                                 
763                                                 if(qFileName != ""){
764                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
765                                                         currQual.printQScores(output);
766                                                         output.close();                                                 
767                                                 }
768                                                 
769                                                 if(nameFile != ""){
770                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
771                                                         if (itName != nameMap.end()) { 
772                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
773                                                                 output << itName->first << '\t' << itName->second << endl; 
774                                                                 output.close();
775                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
776                                                 }
777                                         }
778                                 }
779                                 else{
780                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
781                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
782                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
783                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
784                                         }
785                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
786                                         currSeq.setUnaligned(origSeq);
787                                         currSeq.setAligned(origSeq);
788                                         currSeq.printSequence(scrapFASTAFile);
789                                         if(qFileName != ""){
790                                                 currQual.printQScores(scrapQualFile);
791                                         }
792                                 }
793                                 count++;
794                         }
795                         
796                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
797                                 unsigned long long pos = inFASTA.tellg();
798                                 if ((pos == -1) || (pos >= line.end)) { break; }
799                         
800                         #else
801                                 if (inFASTA.eof()) { break; }
802                         #endif
803                         
804                         //report progress
805                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
806                         
807                 }
808                 //report progress
809                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
810                 
811                 
812                 inFASTA.close();
813                 trimFASTAFile.close();
814                 scrapFASTAFile.close();
815                 if (createGroup) {       outGroupsFile.close();   }
816                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
817                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
818                 
819                 return count;
820         }
821         catch(exception& e) {
822                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
823                 exit(1);
824         }
825 }
826
827 /**************************************************************************************************/
828
829 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
830         try {
831         
832         int process = 1;
833                 int exitCommand = 1;
834                 processIDS.clear();
835                 
836 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
837                                 //loop through and create all the processes you want
838                 while (process != processors) {
839                         int pid = fork();
840                         
841                         if (pid > 0) {
842                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
843                                 process++;
844                         }else if (pid == 0){
845                                 
846                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
847                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
848                                 vector<vector<string> > tempNameFileNames = nameFileNames;
849
850                                 if(allFiles){
851                                         ofstream temp;
852
853                                         for(int i=0;i<tempFASTAFileNames.size();i++){
854                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
855                                                         if (tempFASTAFileNames[i][j] != "") {
856                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
857                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
858
859                                                                 if(qFileName != ""){
860                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
861                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
862                                                                 }
863                                                                 if(nameFile != ""){
864                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
865                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
866                                                                 }
867                                                         }
868                                                 }
869                                         }
870                                 }
871                                                         
872                                 driverCreateTrim(filename,
873                                                                  qFileName,
874                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
875                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
876                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
877                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
878                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
879                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
880                                                                  (groupFile + toString(getpid()) + ".temp"),
881                                                                  tempFASTAFileNames,
882                                                                  tempPrimerQualFileNames,
883                                                                  tempNameFileNames,
884                                                                  lines[process],
885                                                                  qLines[process]);
886                 
887                 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); }
888                                 
889                                 //pass groupCounts to parent
890                                 if(createGroup){
891                                         ofstream out;
892                                         string tempFile = filename + toString(getpid()) + ".num.temp";
893                                         m->openOutputFile(tempFile, out);
894                                         
895                                         out << groupCounts.size() << endl;
896                                         
897                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
898                                                 out << it->first << '\t' << it->second << endl;
899                                         }
900                                         out.close();
901                                 }
902                                 exit(0);
903                         }else { 
904                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
905                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
906                                 exit(0);
907                         }
908                 }
909                 
910                 //parent do my part
911                 ofstream temp;
912                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
913                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
914                 if(qFileName != ""){
915                         m->openOutputFile(trimQualFileName, temp);              temp.close();
916                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
917                 }
918                 if (nameFile != "") {
919                         m->openOutputFile(trimNameFileName, temp);              temp.close();
920                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
921                 }
922
923                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
924                 
925                 //force parent to wait until all the processes are done
926                 for (int i=0;i<processIDS.size();i++) { 
927                         int temp = processIDS[i];
928                         wait(&temp);
929                 }
930 #else
931         //////////////////////////////////////////////////////////////////////////////////////////////////////
932                 //Windows version shared memory, so be careful when passing variables through the trimData struct. 
933                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
934                 //////////////////////////////////////////////////////////////////////////////////////////////////////
935                 
936                 vector<trimData*> pDataArray; 
937                 DWORD   dwThreadIdArray[processors-1];
938                 HANDLE  hThreadArray[processors-1]; 
939                 
940                 //Create processor worker threads.
941                 for( int i=0; i<processors-1; i++){
942                         
943             string extension = "";
944                         if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
945             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
946             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
947             vector<vector<string> > tempNameFileNames = nameFileNames;
948             
949             if(allFiles){
950                 ofstream temp;
951                 
952                 for(int i=0;i<tempFASTAFileNames.size();i++){
953                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
954                         if (tempFASTAFileNames[i][j] != "") {
955                             tempFASTAFileNames[i][j] += extension;
956                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
957                             
958                             if(qFileName != ""){
959                                 tempPrimerQualFileNames[i][j] += extension;
960                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
961                             }
962                             if(nameFile != ""){
963                                 tempNameFileNames[i][j] += extension;
964                                 m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
965                             }
966                         }
967                     }
968                 }
969             }
970
971             
972                         trimData* tempTrim = new trimData(filename,
973                                               qFileName, nameFile,
974                                               (trimFASTAFileName+extension),
975                                               (scrapFASTAFileName+extension),
976                                               (trimQualFileName+extension),
977                                               (scrapQualFileName+extension),
978                                               (trimNameFileName+extension),
979                                               (scrapNameFileName+extension),
980                                               (groupFile+extension),
981                                               tempFASTAFileNames,
982                                               tempPrimerQualFileNames,
983                                               tempNameFileNames,
984                                               lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
985                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer, 
986                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
987                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
988                                              minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
989                         pDataArray.push_back(tempTrim);
990             
991                         hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
992                 }
993         
994         //parent do my part
995                 ofstream temp;
996                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
997                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
998                 if(qFileName != ""){
999                         m->openOutputFile(trimQualFileName, temp);              temp.close();
1000                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
1001                 }
1002                 if (nameFile != "") {
1003                         m->openOutputFile(trimNameFileName, temp);              temp.close();
1004                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
1005                 }
1006         
1007                 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
1008         processIDS.push_back(processors-1);
1009
1010         
1011                 //Wait until all threads have terminated.
1012                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1013                 
1014                 //Close all thread handles and free memory allocations.
1015                 for(int i=0; i < pDataArray.size(); i++){
1016                         for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
1017                 map<string, int>::iterator it2 = groupCounts.find(it->first);
1018                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
1019                 else { groupCounts[it->first] += it->second; }
1020             }
1021             CloseHandle(hThreadArray[i]);
1022                         delete pDataArray[i];
1023                 }
1024         
1025 #endif          
1026         
1027         
1028         //append files
1029                 for(int i=0;i<processIDS.size();i++){
1030                         
1031                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1032                         
1033                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
1034                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
1035                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1036                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1037                         
1038                         if(qFileName != ""){
1039                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1040                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1041                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1042                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1043                         }
1044                         
1045                         if(nameFile != ""){
1046                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1047                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1048                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1049                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1050                         }
1051                         
1052                         if(createGroup){
1053                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1054                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1055                         }
1056                         
1057                         
1058                         if(allFiles){
1059                                 for(int j=0;j<fastaFileNames.size();j++){
1060                                         for(int k=0;k<fastaFileNames[j].size();k++){
1061                                                 if (fastaFileNames[j][k] != "") {
1062                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1063                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1064                                                         
1065                                                         if(qFileName != ""){
1066                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1067                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1068                                                         }
1069                                                         
1070                                                         if(nameFile != ""){
1071                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1072                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1073                                                         }
1074                                                 }
1075                                         }
1076                                 }
1077                         }
1078                         
1079             #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1080                         if(createGroup){
1081                                 ifstream in;
1082                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1083                                 m->openInputFile(tempFile, in);
1084                                 int tempNum;
1085                                 string group;
1086                                 
1087                                 in >> tempNum; m->gobble(in);
1088                                 
1089                                 if (tempNum != 0) {
1090                                         while (!in.eof()) { 
1091                                                 in >> group >> tempNum; m->gobble(in);
1092                         
1093                                                 map<string, int>::iterator it = groupCounts.find(group);
1094                                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
1095                                                 else { groupCounts[it->first] += tempNum; }
1096                                         }
1097                                 }
1098                                 in.close(); m->mothurRemove(tempFile);
1099                         }
1100             #endif
1101                 }
1102
1103         return exitCommand;
1104         }
1105         catch(exception& e) {
1106                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1107                 exit(1);
1108         }
1109 }
1110
1111 /**************************************************************************************************/
1112
1113 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1114         try {
1115         
1116         vector<unsigned long long> fastaFilePos;
1117                 vector<unsigned long long> qfileFilePos;
1118                 
1119                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1120                 //set file positions for fasta file
1121                 fastaFilePos = m->divideFile(filename, processors);
1122                 
1123                 //get name of first sequence in each chunk
1124                 map<string, int> firstSeqNames;
1125                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1126                         ifstream in;
1127                         m->openInputFile(filename, in);
1128                         in.seekg(fastaFilePos[i]);
1129                 
1130                         Sequence temp(in); 
1131                         firstSeqNames[temp.getName()] = i;
1132                 
1133                         in.close();
1134                 }
1135                 
1136                 if(qfilename != "")     {
1137             //seach for filePos of each first name in the qfile and save in qfileFilePos
1138             ifstream inQual;
1139             m->openInputFile(qfilename, inQual);
1140             
1141             string input;
1142             while(!inQual.eof()){       
1143                 input = m->getline(inQual);
1144                 
1145                 if (input.length() != 0) {
1146                     if(input[0] == '>'){ //this is a sequence name line
1147                         istringstream nameStream(input);
1148                         
1149                         string sname = "";  nameStream >> sname;
1150                         sname = sname.substr(1);
1151                         
1152                         map<string, int>::iterator it = firstSeqNames.find(sname);
1153                         
1154                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
1155                             unsigned long long pos = inQual.tellg(); 
1156                             qfileFilePos.push_back(pos - input.length() - 1);   
1157                             firstSeqNames.erase(it);
1158                         }
1159                     }
1160                 }
1161                 
1162                 if (firstSeqNames.size() == 0) { break; }
1163             }
1164             inQual.close();
1165             
1166             
1167             if (firstSeqNames.size() != 0) { 
1168                 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1169                     m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1170                 }
1171                 qFileName = "";
1172                 return processors;
1173             }
1174             
1175             //get last file position of qfile
1176             FILE * pFile;
1177             unsigned long long size;
1178             
1179             //get num bytes in file
1180             pFile = fopen (qfilename.c_str(),"rb");
1181             if (pFile==NULL) perror ("Error opening file");
1182             else{
1183                 fseek (pFile, 0, SEEK_END);
1184                 size=ftell (pFile);
1185                 fclose (pFile);
1186             }
1187             
1188             qfileFilePos.push_back(size);
1189         }
1190         
1191         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1192             if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); }
1193                         lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1194                         if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
1195                 }       
1196                 if(qfilename == "")     {       qLines = lines; } //files with duds
1197                 
1198                 return processors;
1199                 
1200                 #else
1201             
1202         if (processors == 1) { //save time
1203                         //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1204                         //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1205             lines.push_back(linePair(0, 1000));
1206             if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
1207         }else{
1208             int numFastaSeqs = 0;
1209             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
1210             if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1211         
1212             if (qfilename != "") { 
1213                 int numQualSeqs = 0;
1214                 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
1215                 
1216                 if (numFastaSeqs != numQualSeqs) {
1217                     m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
1218                 }
1219             }
1220         
1221             //figure out how many sequences you have to process
1222             int numSeqsPerProcessor = numFastaSeqs / processors;
1223             for (int i = 0; i < processors; i++) {
1224                 int startIndex =  i * numSeqsPerProcessor;
1225                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
1226                 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1227                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1228             }
1229         }
1230             if(qfilename == "") {       qLines = lines; } //files with duds
1231                         return 1;
1232                 
1233                 #endif
1234         }
1235         catch(exception& e) {
1236                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1237                 exit(1);
1238         }
1239 }
1240
1241 //***************************************************************************************************************
1242
1243 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1244         try {
1245                 ifstream inOligos;
1246                 m->openInputFile(oligoFile, inOligos);
1247                 
1248                 ofstream test;
1249                 
1250                 string type, oligo, group;
1251
1252                 int indexPrimer = 0;
1253                 int indexBarcode = 0;
1254                 
1255                 while(!inOligos.eof()){
1256
1257                         inOligos >> type; 
1258                                         
1259                         if(type[0] == '#'){
1260                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1261                                 m->gobble(inOligos);
1262                         }
1263                         else{
1264                                 m->gobble(inOligos);
1265                                 //make type case insensitive
1266                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1267                                 
1268                                 inOligos >> oligo;
1269                                 
1270                                 for(int i=0;i<oligo.length();i++){
1271                                         oligo[i] = toupper(oligo[i]);
1272                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1273                                 }
1274                                 
1275                                 if(type == "FORWARD"){
1276                                         group = "";
1277                                         
1278                                         // get rest of line in case there is a primer name
1279                                         while (!inOligos.eof()) {       
1280                                                 char c = inOligos.get(); 
1281                                                 if (c == 10 || c == 13){        break;  }
1282                                                 else if (c == 32 || c == 9){;} //space or tab
1283                                                 else {  group += c;  }
1284                                         } 
1285                                         
1286                                         //check for repeat barcodes
1287                                         map<string, int>::iterator itPrime = primers.find(oligo);
1288                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1289                                         
1290                                         primers[oligo]=indexPrimer; indexPrimer++;              
1291                                         primerNameVector.push_back(group);
1292                                 }
1293                                 else if(type == "REVERSE"){
1294                                         //Sequence oligoRC("reverse", oligo);
1295                                         //oligoRC.reverseComplement();
1296                     string oligoRC = reverseOligo(oligo);
1297                                         revPrimer.push_back(oligoRC);
1298                                 }
1299                                 else if(type == "BARCODE"){
1300                                         inOligos >> group;
1301                     
1302                     //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
1303                     //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
1304                                         string temp = "";
1305                     while (!inOligos.eof())     {       
1306                                                 char c = inOligos.get(); 
1307                                                 if (c == 10 || c == 13){        break;  }
1308                                                 else if (c == 32 || c == 9){;} //space or tab
1309                                                 else {  temp += c;  }
1310                                         } 
1311                                         
1312                     //then this is illumina data with 4 columns
1313                     if (temp != "") {  
1314                         string reverseBarcode = reverseOligo(group); //reverse barcode
1315                         group = temp;
1316                         
1317                         //check for repeat barcodes
1318                         map<string, int>::iterator itBar = rbarcodes.find(reverseBarcode);
1319                         if (itBar != rbarcodes.end()) { m->mothurOut("barcode " + reverseBarcode + " is in your oligos file already."); m->mothurOutEndLine();  }
1320                                                 
1321                         rbarcodes[reverseBarcode]=indexBarcode; 
1322                     }
1323                         
1324                                         //check for repeat barcodes
1325                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1326                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1327                                                 
1328                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1329                                         barcodeNameVector.push_back(group);
1330                                 }else if(type == "LINKER"){
1331                                         linker.push_back(oligo);
1332                                 }else if(type == "SPACER"){
1333                                         spacer.push_back(oligo);
1334                                 }
1335                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
1336                         }
1337                         m->gobble(inOligos);
1338                 }       
1339                 inOligos.close();
1340                 
1341                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1342                 
1343                 //add in potential combos
1344                 if(barcodeNameVector.size() == 0){
1345                         barcodes[""] = 0;
1346                         barcodeNameVector.push_back("");                        
1347                 }
1348                 
1349                 if(primerNameVector.size() == 0){
1350                         primers[""] = 0;
1351                         primerNameVector.push_back("");                 
1352                 }
1353                 
1354                 fastaFileNames.resize(barcodeNameVector.size());
1355                 for(int i=0;i<fastaFileNames.size();i++){
1356                         fastaFileNames[i].assign(primerNameVector.size(), "");
1357                 }
1358                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1359                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1360                 
1361                 if(allFiles){
1362                         set<string> uniqueNames; //used to cleanup outputFileNames
1363                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1364                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1365                                         
1366                                         string primerName = primerNameVector[itPrimer->second];
1367                                         string barcodeName = barcodeNameVector[itBar->second];
1368                                         
1369                                         string comboGroupName = "";
1370                                         string fastaFileName = "";
1371                                         string qualFileName = "";
1372                                         string nameFileName = "";
1373                                         
1374                                         if(primerName == ""){
1375                                                 comboGroupName = barcodeNameVector[itBar->second];
1376                                         }
1377                                         else{
1378                                                 if(barcodeName == ""){
1379                                                         comboGroupName = primerNameVector[itPrimer->second];
1380                                                 }
1381                                                 else{
1382                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1383                                                 }
1384                                         }
1385                                         
1386                                         
1387                                         ofstream temp;
1388                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1389                                         if (uniqueNames.count(fastaFileName) == 0) {
1390                                                 outputNames.push_back(fastaFileName);
1391                                                 outputTypes["fasta"].push_back(fastaFileName);
1392                                                 uniqueNames.insert(fastaFileName);
1393                                         }
1394                                         
1395                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1396                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1397                                         
1398                                         if(qFileName != ""){
1399                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1400                                                 if (uniqueNames.count(qualFileName) == 0) {
1401                                                         outputNames.push_back(qualFileName);
1402                                                         outputTypes["qfile"].push_back(qualFileName);
1403                                                 }
1404                                                 
1405                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1406                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1407                                         }
1408                                         
1409                                         if(nameFile != ""){
1410                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1411                                                 if (uniqueNames.count(nameFileName) == 0) {
1412                                                         outputNames.push_back(nameFileName);
1413                                                         outputTypes["name"].push_back(nameFileName);
1414                                                 }
1415                                                 
1416                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1417                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1418                                         }
1419                                         
1420                                 }
1421                         }
1422                 }
1423                 numFPrimers = primers.size();
1424                 numRPrimers = revPrimer.size();
1425         numLinkers = linker.size();
1426         numSpacers = spacer.size();
1427                 
1428                 bool allBlank = true;
1429                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1430                         if (barcodeNameVector[i] != "") {
1431                                 allBlank = false;
1432                                 break;
1433                         }
1434                 }
1435                 for (int i = 0; i < primerNameVector.size(); i++) {
1436                         if (primerNameVector[i] != "") {
1437                                 allBlank = false;
1438                                 break;
1439                         }
1440                 }
1441                 
1442                 if (allBlank) {
1443                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1444                         allFiles = false;
1445                         return false;
1446                 }
1447                 
1448                 return true;
1449                 
1450         }
1451         catch(exception& e) {
1452                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1453                 exit(1);
1454         }
1455 }
1456 //***************************************************************************************************************
1457
1458 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1459         try {
1460                 bool success = 1;
1461                 if(qscores.getName() != ""){
1462                         qscores.trimQScores(-1, keepFirst);
1463                 }
1464                 sequence.trim(keepFirst);
1465                 return success;
1466         }
1467         catch(exception& e) {
1468                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1469                 exit(1);
1470         }
1471         
1472 }       
1473
1474 //***************************************************************************************************************
1475
1476 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1477         try {
1478                 bool success = 0;
1479                 
1480                 int length = sequence.getNumBases() - removeLast;
1481                 
1482                 if(length > 0){
1483                         if(qscores.getName() != ""){
1484                                 qscores.trimQScores(-1, length);
1485                         }
1486                         sequence.trim(length);
1487                         success = 1;
1488                 }
1489                 else{
1490                         success = 0;
1491                 }
1492
1493                 return success;
1494         }
1495         catch(exception& e) {
1496                 m->errorOut(e, "removeLastTrim", "countDiffs");
1497                 exit(1);
1498         }
1499         
1500 }       
1501
1502 //***************************************************************************************************************
1503
1504 bool TrimSeqsCommand::cullLength(Sequence& seq){
1505         try {
1506         
1507                 int length = seq.getNumBases();
1508                 bool success = 0;       //guilty until proven innocent
1509                 
1510                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1511                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1512                 else                                                                                            {       success = 0;    }
1513                 
1514                 return success;
1515         
1516         }
1517         catch(exception& e) {
1518                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1519                 exit(1);
1520         }
1521         
1522 }
1523
1524 //***************************************************************************************************************
1525
1526 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1527         try {
1528                 int longHomoP = seq.getLongHomoPolymer();
1529                 bool success = 0;       //guilty until proven innocent
1530                 
1531                 if(longHomoP <= maxHomoP){      success = 1;    }
1532                 else                                    {       success = 0;    }
1533                 
1534                 return success;
1535         }
1536         catch(exception& e) {
1537                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1538                 exit(1);
1539         }
1540         
1541 }
1542 //********************************************************************/
1543 string TrimSeqsCommand::reverseOligo(string oligo){
1544         try {
1545         string reverse = "";
1546         
1547         for(int i=oligo.length()-1;i>=0;i--){
1548             
1549             if(oligo[i] == 'A')         {       reverse += 'T'; }
1550             else if(oligo[i] == 'T'){   reverse += 'A'; }
1551             else if(oligo[i] == 'U'){   reverse += 'A'; }
1552             
1553             else if(oligo[i] == 'G'){   reverse += 'C'; }
1554             else if(oligo[i] == 'C'){   reverse += 'G'; }
1555             
1556             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1557             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1558             
1559             else if(oligo[i] == 'M'){   reverse += 'K'; }
1560             else if(oligo[i] == 'K'){   reverse += 'M'; }
1561             
1562             else if(oligo[i] == 'W'){   reverse += 'W'; }
1563             else if(oligo[i] == 'S'){   reverse += 'S'; }
1564             
1565             else if(oligo[i] == 'B'){   reverse += 'V'; }
1566             else if(oligo[i] == 'V'){   reverse += 'B'; }
1567             
1568             else if(oligo[i] == 'D'){   reverse += 'H'; }
1569             else if(oligo[i] == 'H'){   reverse += 'D'; }
1570             
1571             else                                                {       reverse += 'N'; }
1572         }
1573         
1574         
1575         return reverse;
1576     }
1577         catch(exception& e) {
1578                 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1579                 exit(1);
1580         }
1581 }
1582
1583 //***************************************************************************************************************
1584
1585 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1586         try {
1587                 int numNs = seq.getAmbigBases();
1588                 bool success = 0;       //guilty until proven innocent
1589                 
1590                 if(numNs <= maxAmbig)   {       success = 1;    }
1591                 else                                    {       success = 0;    }
1592                 
1593                 return success;
1594         }
1595         catch(exception& e) {
1596                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1597                 exit(1);
1598         }
1599         
1600 }
1601 //***************************************************************************************************************