]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
major change to the tree class to use the count table class instead of tree map....
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::setParameters(){        
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
22                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
23                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
24                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
25                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
26                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
27                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
28         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
29                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
30         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
31                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
32                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
33                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
34                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
35                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
36                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
37                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
38                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
39                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
40                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
41                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
42                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
43                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
44                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
45                         
46                 vector<string> myArray;
47                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string TrimSeqsCommand::getHelpString(){        
57         try {
58                 string helpString = "";
59                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
60                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
61                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
62                 helpString += "The fasta parameter is required.\n";
63                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
64                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
65                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
66                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
67                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
68                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
69                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
70                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
71                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
72                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
73         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
74                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
75                 helpString += "The qfile parameter allows you to provide a quality file.\n";
76                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
77                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
78                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
79                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
80                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
81                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
82                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
83                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
84                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
85                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
86                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
87                 helpString += "The trim.seqs command should be in the following format: \n";
88                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
89                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
90                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
91                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
92                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101 string TrimSeqsCommand::getOutputFileNameTag(string type, string inputName=""){ 
102         try {
103         string outputFileName = "";
104                 map<string, vector<string> >::iterator it;
105         
106         //is this a type this command creates
107         it = outputTypes.find(type);
108         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
109         else {
110             if (type == "qfile")            {   outputFileName =  "qual";   }
111             else if (type == "fasta")            {   outputFileName =  "fasta";   }
112             else if (type == "group")            {   outputFileName =  "groups";   }
113             else if (type == "name")            {   outputFileName =  "names";   }
114             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
115         }
116         return outputFileName;
117         }
118         catch(exception& e) {
119                 m->errorOut(e, "TrimSeqsCommand", "getOutputFileNameTag");
120                 exit(1);
121         }
122 }
123
124
125 //**********************************************************************************************************************
126
127 TrimSeqsCommand::TrimSeqsCommand(){     
128         try {
129                 abort = true; calledHelp = true; 
130                 setParameters();
131                 vector<string> tempOutNames;
132                 outputTypes["fasta"] = tempOutNames;
133                 outputTypes["qfile"] = tempOutNames;
134                 outputTypes["group"] = tempOutNames;
135                 outputTypes["name"] = tempOutNames;
136         }
137         catch(exception& e) {
138                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
139                 exit(1);
140         }
141 }
142 //***************************************************************************************************************
143
144 TrimSeqsCommand::TrimSeqsCommand(string option)  {
145         try {
146                 
147                 abort = false; calledHelp = false;   
148                 comboStarts = 0;
149                 
150                 //allow user to run help
151                 if(option == "help") { help(); abort = true; calledHelp = true; }
152                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
153                 
154                 else {
155                         vector<string> myArray = setParameters();
156                         
157                         OptionParser parser(option);
158                         map<string,string> parameters = parser.getParameters();
159                         
160                         ValidParameters validParameter;
161                         map<string,string>::iterator it;
162                         
163                         //check to make sure all parameters are valid for command
164                         for (it = parameters.begin(); it != parameters.end(); it++) { 
165                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
166                         }
167                         
168                         //initialize outputTypes
169                         vector<string> tempOutNames;
170                         outputTypes["fasta"] = tempOutNames;
171                         outputTypes["qfile"] = tempOutNames;
172                         outputTypes["group"] = tempOutNames;
173                         outputTypes["name"] = tempOutNames;
174                         
175                         //if the user changes the input directory command factory will send this info to us in the output parameter 
176                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
177                         if (inputDir == "not found"){   inputDir = "";          }
178                         else {
179                                 string path;
180                                 it = parameters.find("fasta");
181                                 //user has given a template file
182                                 if(it != parameters.end()){ 
183                                         path = m->hasPath(it->second);
184                                         //if the user has not given a path then, add inputdir. else leave path alone.
185                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
186                                 }
187                                 
188                                 it = parameters.find("oligos");
189                                 //user has given a template file
190                                 if(it != parameters.end()){ 
191                                         path = m->hasPath(it->second);
192                                         //if the user has not given a path then, add inputdir. else leave path alone.
193                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
194                                 }
195                                 
196                                 it = parameters.find("qfile");
197                                 //user has given a template file
198                                 if(it != parameters.end()){ 
199                                         path = m->hasPath(it->second);
200                                         //if the user has not given a path then, add inputdir. else leave path alone.
201                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
202                                 }
203                                 
204                                 it = parameters.find("name");
205                                 //user has given a template file
206                                 if(it != parameters.end()){ 
207                                         path = m->hasPath(it->second);
208                                         //if the user has not given a path then, add inputdir. else leave path alone.
209                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
210                                 }
211                                 
212                         }
213
214                         
215                         //check for required parameters
216                         fastaFile = validParameter.validFile(parameters, "fasta", true);
217                         if (fastaFile == "not found") {                                 
218                                 fastaFile = m->getFastaFile(); 
219                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
220                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
221                         }else if (fastaFile == "not open") { abort = true; }    
222                         else { m->setFastaFile(fastaFile); }
223                         
224                         //if the user changes the output directory command factory will send this info to us in the output parameter 
225                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
226                                 outputDir = ""; 
227                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
228                         }
229                 
230                         
231                         //check for optional parameter and set defaults
232                         // ...at some point should added some additional type checking...
233                         string temp;
234                         temp = validParameter.validFile(parameters, "flip", false);
235                         if (temp == "not found")    {   flip = 0;       }
236                         else {  flip = m->isTrue(temp);         }
237                 
238                         temp = validParameter.validFile(parameters, "oligos", true);
239                         if (temp == "not found"){       oligoFile = "";         }
240                         else if(temp == "not open"){    abort = true;   } 
241                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
242                         
243                         
244                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
245                         m->mothurConvert(temp, maxAmbig);  
246
247                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
248                         m->mothurConvert(temp, maxHomoP);  
249
250                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
251                         m->mothurConvert(temp, minLength); 
252                         
253                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
254                         m->mothurConvert(temp, maxLength);
255                         
256                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
257                         m->mothurConvert(temp, bdiffs);
258                         
259                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
260                         m->mothurConvert(temp, pdiffs);
261             
262             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
263                         m->mothurConvert(temp, ldiffs);
264             
265             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
266                         m->mothurConvert(temp, sdiffs);
267                         
268                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
269                         m->mothurConvert(temp, tdiffs);
270                         
271                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
272                         
273                         temp = validParameter.validFile(parameters, "qfile", true);     
274                         if (temp == "not found")        {       qFileName = "";         }
275                         else if(temp == "not open")     {       abort = true;           }
276                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
277                         
278                         temp = validParameter.validFile(parameters, "name", true);      
279                         if (temp == "not found")        {       nameFile = "";          }
280                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
281                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
282                         
283                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
284                         m->mothurConvert(temp, qThreshold);
285                         
286                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
287                         qtrim = m->isTrue(temp);
288
289                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
290                         convert(temp, qRollAverage);
291
292                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
293                         convert(temp, qWindowAverage);
294
295                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
296                         convert(temp, qWindowSize);
297
298                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
299                         convert(temp, qWindowStep);
300
301                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
302                         convert(temp, qAverage);
303
304                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
305                         convert(temp, keepFirst);
306
307                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
308                         convert(temp, removeLast);
309                         
310                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
311                         allFiles = m->isTrue(temp);
312             
313             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
314                         keepforward = m->isTrue(temp);
315                         
316                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
317                         m->setProcessors(temp);
318                         m->mothurConvert(temp, processors); 
319                         
320                         
321                         if(allFiles && (oligoFile == "")){
322                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
323                         }
324                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
325                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
326                                 qAverage=0;
327                                 qThreshold=0;
328                         }
329                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
330                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
331                                 abort = true;
332                         }
333                         
334                         if (nameFile == "") {
335                                 vector<string> files; files.push_back(fastaFile);
336                                 parser.getNameFile(files);
337                         }
338                 }
339
340         }
341         catch(exception& e) {
342                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
343                 exit(1);
344         }
345 }
346 //***************************************************************************************************************
347
348 int TrimSeqsCommand::execute(){
349         try{
350         
351                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
352                 
353                 numFPrimers = 0;  //this needs to be initialized
354                 numRPrimers = 0;
355         numSpacers = 0;
356         numLinkers = 0;
357                 createGroup = false;
358                 vector<vector<string> > fastaFileNames;
359                 vector<vector<string> > qualFileNames;
360                 vector<vector<string> > nameFileNames;
361                 
362                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("fasta");
363                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
364                 
365                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("fasta");
366                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
367                 
368                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("qfile");
369                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("qfile");
370                 
371                 if (qFileName != "") {
372                         outputNames.push_back(trimQualFile);
373                         outputNames.push_back(scrapQualFile);
374                         outputTypes["qfile"].push_back(trimQualFile);
375                         outputTypes["qfile"].push_back(scrapQualFile); 
376                 }
377                 
378                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim." + getOutputFileNameTag("name");
379                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap." + getOutputFileNameTag("name");
380                 
381                 if (nameFile != "") {
382                         m->readNames(nameFile, nameMap);
383                         outputNames.push_back(trimNameFile);
384                         outputNames.push_back(scrapNameFile);
385                         outputTypes["name"].push_back(trimNameFile);
386                         outputTypes["name"].push_back(scrapNameFile); 
387                 }
388                 
389                 if (m->control_pressed) { return 0; }
390                 
391                 string outputGroupFileName;
392                 if(oligoFile != ""){
393                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
394                         if (createGroup) {
395                                 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + getOutputFileNameTag("group");
396                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
397                         }
398                 }
399        
400         //fills lines and qlines
401                 setLines(fastaFile, qFileName);
402                 
403         if(processors == 1){
404             driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
405         }else{
406             createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
407         }       
408                 
409                 
410                 if (m->control_pressed) {  return 0; }                  
411         
412                 if(allFiles){
413                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
414                         map<string, string>::iterator it;
415                         set<string> namesToRemove;
416                         for(int i=0;i<fastaFileNames.size();i++){
417                                 for(int j=0;j<fastaFileNames[0].size();j++){
418                                         if (fastaFileNames[i][j] != "") {
419                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
420                                                         if(m->isBlank(fastaFileNames[i][j])){
421                                                                 m->mothurRemove(fastaFileNames[i][j]);
422                                                                 namesToRemove.insert(fastaFileNames[i][j]);
423                                                         
424                                                                 if(qFileName != ""){
425                                                                         m->mothurRemove(qualFileNames[i][j]);
426                                                                         namesToRemove.insert(qualFileNames[i][j]);
427                                                                 }
428                                                                 
429                                                                 if(nameFile != ""){
430                                                                         m->mothurRemove(nameFileNames[i][j]);
431                                                                         namesToRemove.insert(nameFileNames[i][j]);
432                                                                 }
433                                                         }else{  
434                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
435                                                                 if (it == uniqueFastaNames.end()) {     
436                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
437                                                                 }       
438                                                         }
439                                                 }
440                                         }
441                                 }
442                         }
443                         
444                         //remove names for outputFileNames, just cleans up the output
445                         vector<string> outputNames2;
446                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
447                         outputNames = outputNames2;
448                         
449                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
450                                 ifstream in;
451                                 m->openInputFile(it->first, in);
452                                 
453                                 ofstream out;
454                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + getOutputFileNameTag("group");
455                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
456                                 m->openOutputFile(thisGroupName, out);
457                                 
458                                 while (!in.eof()){
459                                         if (m->control_pressed) { break; }
460                                         
461                                         Sequence currSeq(in); m->gobble(in);
462                                         out << currSeq.getName() << '\t' << it->second << endl;
463                     
464                     if (nameFile != "") {
465                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
466                         if (itName != nameMap.end()) { 
467                             vector<string> thisSeqsNames; 
468                             m->splitAtChar(itName->second, thisSeqsNames, ',');
469                             for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
470                                 out << thisSeqsNames[k] << '\t' << it->second << endl;
471                             }
472                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
473                     }
474                                 }
475                                 in.close();
476                                 out.close();
477                         }
478                 }
479                 
480                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
481
482                 //output group counts
483                 m->mothurOutEndLine();
484                 int total = 0;
485                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
486                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
487                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
488                 }
489                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
490                 
491                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
492
493                 //set fasta file as new current fastafile
494                 string current = "";
495                 itTypes = outputTypes.find("fasta");
496                 if (itTypes != outputTypes.end()) {
497                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
498                 }
499                 
500                 itTypes = outputTypes.find("name");
501                 if (itTypes != outputTypes.end()) {
502                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
503                 }
504                 
505                 itTypes = outputTypes.find("qfile");
506                 if (itTypes != outputTypes.end()) {
507                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
508                 }
509                 
510                 itTypes = outputTypes.find("group");
511                 if (itTypes != outputTypes.end()) {
512                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
513                 }
514
515                 m->mothurOutEndLine();
516                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
517                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
518                 m->mothurOutEndLine();
519                 
520                 return 0;       
521                         
522         }
523         catch(exception& e) {
524                 m->errorOut(e, "TrimSeqsCommand", "execute");
525                 exit(1);
526         }
527 }
528                 
529 /**************************************************************************************/
530
531 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {      
532                 
533         try {
534                 
535                 ofstream trimFASTAFile;
536                 m->openOutputFile(trimFileName, trimFASTAFile);
537                 
538                 ofstream scrapFASTAFile;
539                 m->openOutputFile(scrapFileName, scrapFASTAFile);
540                 
541                 ofstream trimQualFile;
542                 ofstream scrapQualFile;
543                 if(qFileName != ""){
544                         m->openOutputFile(trimQFileName, trimQualFile);
545                         m->openOutputFile(scrapQFileName, scrapQualFile);
546                 }
547                 
548                 ofstream trimNameFile;
549                 ofstream scrapNameFile;
550                 if(nameFile != ""){
551                         m->openOutputFile(trimNFileName, trimNameFile);
552                         m->openOutputFile(scrapNFileName, scrapNameFile);
553                 }
554                 
555                 
556                 ofstream outGroupsFile;
557                 if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
558                 if(allFiles){
559                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
560                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
561                                         if (fastaFileNames[i][j] != "") {
562                                                 ofstream temp;
563                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
564                                                 if(qFileName != ""){
565                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
566                                                 }
567                                                 
568                                                 if(nameFile != ""){
569                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
570                                                 }
571                                         }
572                                 }
573                         }
574                 }
575                 
576                 ifstream inFASTA;
577                 m->openInputFile(filename, inFASTA);
578                 inFASTA.seekg(line.start);
579                 
580                 ifstream qFile;
581                 if(qFileName != "")     {
582                         m->openInputFile(qFileName, qFile);
583                         qFile.seekg(qline.start);  
584                 }
585                 
586                 int count = 0;
587                 bool moreSeqs = 1;
588                 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer);
589         
590                 while (moreSeqs) {
591                                 
592                         if (m->control_pressed) { 
593                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
594                                 if (createGroup) {       outGroupsFile.close();   }
595
596                                 if(qFileName != ""){
597                                         qFile.close();
598                                 }
599                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
600
601                                 return 0;
602                         }
603                         
604                         int success = 1;
605                         string trashCode = "";
606                         int currentSeqsDiffs = 0;
607
608                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
609                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
610             
611                         QualityScores currQual;
612                         if(qFileName != ""){
613                                 currQual = QualityScores(qFile);  m->gobble(qFile);
614                 //cout << currQual.getName() << endl;
615                         }
616                         
617                         string origSeq = currSeq.getUnaligned();
618                         if (origSeq != "") {
619                                 
620                                 int barcodeIndex = 0;
621                                 int primerIndex = 0;
622                                 
623                 if(numLinkers != 0){
624                                         success = trimOligos.stripLinker(currSeq, currQual);
625                                         if(success > ldiffs)            {       trashCode += 'k';       }
626                                         else{ currentSeqsDiffs += success;  }
627
628                                 }
629                 
630                                 if(barcodes.size() != 0){
631                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
632                                         if(success > bdiffs)            {       trashCode += 'b';       }
633                                         else{ currentSeqsDiffs += success;  }
634                                 }
635                 
636                 if(rbarcodes.size() != 0){
637                                         success = trimOligos.stripRBarcode(currSeq, currQual, barcodeIndex);
638                                         if(success > bdiffs)            {       trashCode += 'b';       }
639                                         else{ currentSeqsDiffs += success;  }
640                                 }
641                                 
642                 if(numSpacers != 0){
643                                         success = trimOligos.stripSpacer(currSeq, currQual);
644                                         if(success > sdiffs)            {       trashCode += 's';       }
645                                         else{ currentSeqsDiffs += success;  }
646
647                                 }
648                 
649                                 if(numFPrimers != 0){
650                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
651                                         if(success > pdiffs)            {       trashCode += 'f';       }
652                                         else{ currentSeqsDiffs += success;  }
653                                 }
654                                 
655                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
656                                 
657                                 if(numRPrimers != 0){
658                                         success = trimOligos.stripReverse(currSeq, currQual);
659                                         if(!success)                            {       trashCode += 'r';       }
660                                 }
661
662                                 if(keepFirst != 0){
663                                         success = keepFirstTrim(currSeq, currQual);
664                                 }
665                                 
666                                 if(removeLast != 0){
667                                         success = removeLastTrim(currSeq, currQual);
668                                         if(!success)                            {       trashCode += 'l';       }
669                                 }
670
671                                 
672                                 if(qFileName != ""){
673                                         int origLength = currSeq.getNumBases();
674                                         
675                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
676                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
677                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
678                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
679                                         else                                            {       success = 1;                            }
680                                         
681                                         //you don't want to trim, if it fails above then scrap it
682                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
683                                         
684                                         if(!success)                            {       trashCode += 'q';       }
685                                 }                               
686                 
687                                 if(minLength > 0 || maxLength > 0){
688                                         success = cullLength(currSeq);
689                                         if(!success)                            {       trashCode += 'l';       }
690                                 }
691                                 if(maxHomoP > 0){
692                                         success = cullHomoP(currSeq);
693                                         if(!success)                            {       trashCode += 'h';       }
694                                 }
695                                 if(maxAmbig != -1){
696                                         success = cullAmbigs(currSeq);
697                                         if(!success)                            {       trashCode += 'n';       }
698                                 }
699                                 
700                                 if(flip){               // should go last                       
701                                         currSeq.reverseComplement();
702                                         if(qFileName != ""){
703                                                 currQual.flipQScores(); 
704                                         }
705                                 }
706                                 
707                                 if(trashCode.length() == 0){
708                                         currSeq.setAligned(currSeq.getUnaligned());
709                                         currSeq.printSequence(trimFASTAFile);
710                                         
711                                         if(qFileName != ""){
712                                                 currQual.printQScores(trimQualFile);
713                                         }
714                                         
715                     
716                                         if(nameFile != ""){
717                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
718                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
719                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
720                                         }
721                                         
722                                         if (createGroup) {
723                                                 if(barcodes.size() != 0){
724                                                         string thisGroup = barcodeNameVector[barcodeIndex];
725                                                         if (primers.size() != 0) { 
726                                                                 if (primerNameVector[primerIndex] != "") { 
727                                                                         if(thisGroup != "") {
728                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
729                                                                         }else {
730                                                                                 thisGroup = primerNameVector[primerIndex]; 
731                                                                         }
732                                                                 } 
733                                                         }
734                                                         
735                                                         outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
736                                                         
737                             int numRedundants = 0;
738                                                         if (nameFile != "") {
739                                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
740                                                                 if (itName != nameMap.end()) { 
741                                                                         vector<string> thisSeqsNames; 
742                                                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
743                                     numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
744                                                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
745                                                                                 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
746                                                                         }
747                                                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
748                                                         }
749                                                         
750                                                         map<string, int>::iterator it = groupCounts.find(thisGroup);
751                                                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1 + numRedundants; }
752                                                         else { groupCounts[it->first] += (1 + numRedundants); }
753                                                                 
754                                                 }
755                                         }
756                                         
757                                         if(allFiles){
758                                                 ofstream output;
759                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
760                                                 currSeq.printSequence(output);
761                                                 output.close();
762                                                 
763                                                 if(qFileName != ""){
764                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
765                                                         currQual.printQScores(output);
766                                                         output.close();                                                 
767                                                 }
768                                                 
769                                                 if(nameFile != ""){
770                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
771                                                         if (itName != nameMap.end()) { 
772                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
773                                                                 output << itName->first << '\t' << itName->second << endl; 
774                                                                 output.close();
775                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
776                                                 }
777                                         }
778                                 }
779                                 else{
780                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
781                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
782                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
783                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
784                                         }
785                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
786                                         currSeq.setUnaligned(origSeq);
787                                         currSeq.setAligned(origSeq);
788                                         currSeq.printSequence(scrapFASTAFile);
789                                         if(qFileName != ""){
790                                                 currQual.printQScores(scrapQualFile);
791                                         }
792                                 }
793                                 count++;
794                         }
795                         
796                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
797                                 unsigned long long pos = inFASTA.tellg();
798                                 if ((pos == -1) || (pos >= line.end)) { break; }
799                         
800                         #else
801                                 if (inFASTA.eof()) { break; }
802                         #endif
803                         
804                         //report progress
805                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
806                         
807                 }
808                 //report progress
809                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
810                 
811                 
812                 inFASTA.close();
813                 trimFASTAFile.close();
814                 scrapFASTAFile.close();
815                 if (createGroup) {       outGroupsFile.close();   }
816                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
817                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
818                 
819                 return count;
820         }
821         catch(exception& e) {
822                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
823                 exit(1);
824         }
825 }
826
827 /**************************************************************************************************/
828
829 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
830         try {
831         
832         int process = 1;
833                 int exitCommand = 1;
834                 processIDS.clear();
835                 
836 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
837                                 //loop through and create all the processes you want
838                 while (process != processors) {
839                         int pid = fork();
840                         
841                         if (pid > 0) {
842                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
843                                 process++;
844                         }else if (pid == 0){
845                                 
846                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
847                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
848                                 vector<vector<string> > tempNameFileNames = nameFileNames;
849
850                                 if(allFiles){
851                                         ofstream temp;
852
853                                         for(int i=0;i<tempFASTAFileNames.size();i++){
854                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
855                                                         if (tempFASTAFileNames[i][j] != "") {
856                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
857                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
858
859                                                                 if(qFileName != ""){
860                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
861                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
862                                                                 }
863                                                                 if(nameFile != ""){
864                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
865                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
866                                                                 }
867                                                         }
868                                                 }
869                                         }
870                                 }
871                                                         
872                                 driverCreateTrim(filename,
873                                                                  qFileName,
874                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
875                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
876                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
877                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
878                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
879                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
880                                                                  (groupFile + toString(getpid()) + ".temp"),
881                                                                  tempFASTAFileNames,
882                                                                  tempPrimerQualFileNames,
883                                                                  tempNameFileNames,
884                                                                  lines[process],
885                                                                  qLines[process]);
886                 
887                 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); }
888                                 
889                                 //pass groupCounts to parent
890                                 if(createGroup){
891                                         ofstream out;
892                                         string tempFile = filename + toString(getpid()) + ".num.temp";
893                                         m->openOutputFile(tempFile, out);
894                                         
895                                         out << groupCounts.size() << endl;
896                                         
897                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
898                                                 out << it->first << '\t' << it->second << endl;
899                                         }
900                                         out.close();
901                                 }
902                                 exit(0);
903                         }else { 
904                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
905                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
906                                 exit(0);
907                         }
908                 }
909                 
910                 //parent do my part
911                 ofstream temp;
912                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
913                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
914                 if(qFileName != ""){
915                         m->openOutputFile(trimQualFileName, temp);              temp.close();
916                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
917                 }
918                 if (nameFile != "") {
919                         m->openOutputFile(trimNameFileName, temp);              temp.close();
920                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
921                 }
922
923                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
924                 
925                 //force parent to wait until all the processes are done
926                 for (int i=0;i<processIDS.size();i++) { 
927                         int temp = processIDS[i];
928                         wait(&temp);
929                 }
930 #else
931         //////////////////////////////////////////////////////////////////////////////////////////////////////
932                 //Windows version shared memory, so be careful when passing variables through the trimData struct. 
933                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
934                 //////////////////////////////////////////////////////////////////////////////////////////////////////
935                 
936                 vector<trimData*> pDataArray; 
937                 DWORD   dwThreadIdArray[processors-1];
938                 HANDLE  hThreadArray[processors-1]; 
939                 
940                 //Create processor worker threads.
941                 for( int i=0; i<processors-1; i++){
942                         
943             string extension = "";
944                         if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
945             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
946             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
947             vector<vector<string> > tempNameFileNames = nameFileNames;
948             
949             if(allFiles){
950                 ofstream temp;
951                 
952                 for(int i=0;i<tempFASTAFileNames.size();i++){
953                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
954                         if (tempFASTAFileNames[i][j] != "") {
955                             tempFASTAFileNames[i][j] += extension;
956                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
957                             
958                             if(qFileName != ""){
959                                 tempPrimerQualFileNames[i][j] += extension;
960                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
961                             }
962                             if(nameFile != ""){
963                                 tempNameFileNames[i][j] += extension;
964                                 m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
965                             }
966                         }
967                     }
968                 }
969             }
970
971             
972                         trimData* tempTrim = new trimData(filename,
973                                               qFileName, nameFile,
974                                               (trimFASTAFileName+extension),
975                                               (scrapFASTAFileName+extension),
976                                               (trimQualFileName+extension),
977                                               (scrapQualFileName+extension),
978                                               (trimNameFileName+extension),
979                                               (scrapNameFileName+extension),
980                                               (groupFile+extension),
981                                               tempFASTAFileNames,
982                                               tempPrimerQualFileNames,
983                                               tempNameFileNames,
984                                               lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
985                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer, 
986                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
987                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
988                                              minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
989                         pDataArray.push_back(tempTrim);
990             
991                         hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
992                 }
993         
994         //parent do my part
995                 ofstream temp;
996                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
997                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
998                 if(qFileName != ""){
999                         m->openOutputFile(trimQualFileName, temp);              temp.close();
1000                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
1001                 }
1002                 if (nameFile != "") {
1003                         m->openOutputFile(trimNameFileName, temp);              temp.close();
1004                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
1005                 }
1006         
1007                 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
1008         processIDS.push_back(processors-1);
1009
1010         
1011                 //Wait until all threads have terminated.
1012                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1013                 
1014                 //Close all thread handles and free memory allocations.
1015                 for(int i=0; i < pDataArray.size(); i++){
1016                         for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
1017                 map<string, int>::iterator it2 = groupCounts.find(it->first);
1018                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
1019                 else { groupCounts[it->first] += it->second; }
1020             }
1021             CloseHandle(hThreadArray[i]);
1022                         delete pDataArray[i];
1023                 }
1024         
1025 #endif          
1026         
1027         
1028         //append files
1029                 for(int i=0;i<processIDS.size();i++){
1030                         
1031                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1032                         
1033                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
1034                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
1035                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1036                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1037                         
1038                         if(qFileName != ""){
1039                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1040                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1041                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1042                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1043                         }
1044                         
1045                         if(nameFile != ""){
1046                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1047                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1048                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1049                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1050                         }
1051                         
1052                         if(createGroup){
1053                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1054                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1055                         }
1056                         
1057                         
1058                         if(allFiles){
1059                                 for(int j=0;j<fastaFileNames.size();j++){
1060                                         for(int k=0;k<fastaFileNames[j].size();k++){
1061                                                 if (fastaFileNames[j][k] != "") {
1062                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1063                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1064                                                         
1065                                                         if(qFileName != ""){
1066                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1067                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1068                                                         }
1069                                                         
1070                                                         if(nameFile != ""){
1071                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1072                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1073                                                         }
1074                                                 }
1075                                         }
1076                                 }
1077                         }
1078                         
1079             #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1080                         if(createGroup){
1081                                 ifstream in;
1082                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1083                                 m->openInputFile(tempFile, in);
1084                                 int tempNum;
1085                                 string group;
1086                                 
1087                                 in >> tempNum; m->gobble(in);
1088                                 
1089                                 if (tempNum != 0) {
1090                                         while (!in.eof()) { 
1091                                                 in >> group >> tempNum; m->gobble(in);
1092                         
1093                                                 map<string, int>::iterator it = groupCounts.find(group);
1094                                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
1095                                                 else { groupCounts[it->first] += tempNum; }
1096                                         }
1097                                 }
1098                                 in.close(); m->mothurRemove(tempFile);
1099                         }
1100             #endif
1101                 }
1102
1103         return exitCommand;
1104         }
1105         catch(exception& e) {
1106                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1107                 exit(1);
1108         }
1109 }
1110
1111 /**************************************************************************************************/
1112
1113 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1114         try {
1115         
1116         vector<unsigned long long> fastaFilePos;
1117                 vector<unsigned long long> qfileFilePos;
1118                 
1119                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1120                 //set file positions for fasta file
1121                 fastaFilePos = m->divideFile(filename, processors);
1122                 
1123                 //get name of first sequence in each chunk
1124                 map<string, int> firstSeqNames;
1125                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1126                         ifstream in;
1127                         m->openInputFile(filename, in);
1128                         in.seekg(fastaFilePos[i]);
1129                 
1130                         Sequence temp(in); 
1131                         firstSeqNames[temp.getName()] = i;
1132                 
1133                         in.close();
1134                 }
1135                 
1136                 if(qfilename != "")     {
1137             //seach for filePos of each first name in the qfile and save in qfileFilePos
1138             ifstream inQual;
1139             m->openInputFile(qfilename, inQual);
1140             
1141             string input;
1142             while(!inQual.eof()){       
1143                 input = m->getline(inQual);
1144                 
1145                 if (input.length() != 0) {
1146                     if(input[0] == '>'){ //this is a sequence name line
1147                         istringstream nameStream(input);
1148                         
1149                         string sname = "";  nameStream >> sname;
1150                         sname = sname.substr(1);
1151                         
1152                         map<string, int>::iterator it = firstSeqNames.find(sname);
1153                         
1154                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
1155                             unsigned long long pos = inQual.tellg(); 
1156                             qfileFilePos.push_back(pos - input.length() - 1);   
1157                             firstSeqNames.erase(it);
1158                         }
1159                     }
1160                 }
1161                 
1162                 if (firstSeqNames.size() == 0) { break; }
1163             }
1164             inQual.close();
1165             
1166             
1167             if (firstSeqNames.size() != 0) { 
1168                 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1169                     m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1170                 }
1171                 qFileName = "";
1172                 return processors;
1173             }
1174             
1175             //get last file position of qfile
1176             FILE * pFile;
1177             unsigned long long size;
1178             
1179             //get num bytes in file
1180             pFile = fopen (qfilename.c_str(),"rb");
1181             if (pFile==NULL) perror ("Error opening file");
1182             else{
1183                 fseek (pFile, 0, SEEK_END);
1184                 size=ftell (pFile);
1185                 fclose (pFile);
1186             }
1187             
1188             qfileFilePos.push_back(size);
1189         }
1190         
1191         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1192             if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); }
1193                         lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1194                         if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
1195                 }       
1196                 if(qfilename == "")     {       qLines = lines; } //files with duds
1197                 
1198                 return processors;
1199                 
1200                 #else
1201             
1202         if (processors == 1) { //save time
1203                         //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1204                         //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1205             lines.push_back(linePair(0, 1000));
1206             if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
1207         }else{
1208             int numFastaSeqs = 0;
1209             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
1210             if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1211         
1212             if (qfilename != "") { 
1213                 int numQualSeqs = 0;
1214                 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
1215                 
1216                 if (numFastaSeqs != numQualSeqs) {
1217                     m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
1218                 }
1219             }
1220         
1221             //figure out how many sequences you have to process
1222             int numSeqsPerProcessor = numFastaSeqs / processors;
1223             for (int i = 0; i < processors; i++) {
1224                 int startIndex =  i * numSeqsPerProcessor;
1225                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
1226                 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1227                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1228             }
1229         }
1230             if(qfilename == "") {       qLines = lines; } //files with duds
1231                         return 1;
1232                 
1233                 #endif
1234         }
1235         catch(exception& e) {
1236                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1237                 exit(1);
1238         }
1239 }
1240
1241 //***************************************************************************************************************
1242
1243 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1244         try {
1245                 ifstream inOligos;
1246                 m->openInputFile(oligoFile, inOligos);
1247                 
1248                 ofstream test;
1249                 
1250                 string type, oligo, group;
1251
1252                 int indexPrimer = 0;
1253                 int indexBarcode = 0;
1254                 
1255                 while(!inOligos.eof()){
1256
1257                         inOligos >> type; 
1258             
1259                         if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
1260             
1261                         if(type[0] == '#'){
1262                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1263                                 m->gobble(inOligos);
1264                         }
1265                         else{
1266                                 m->gobble(inOligos);
1267                                 //make type case insensitive
1268                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1269                                 
1270                                 inOligos >> oligo;
1271                 
1272                 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
1273                                 
1274                                 for(int i=0;i<oligo.length();i++){
1275                                         oligo[i] = toupper(oligo[i]);
1276                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1277                                 }
1278                                 
1279                                 if(type == "FORWARD"){
1280                                         group = "";
1281                                         
1282                                         // get rest of line in case there is a primer name
1283                                         while (!inOligos.eof()) {       
1284                                                 char c = inOligos.get(); 
1285                                                 if (c == 10 || c == 13){        break;  }
1286                                                 else if (c == 32 || c == 9){;} //space or tab
1287                                                 else {  group += c;  }
1288                                         } 
1289                                         
1290                                         //check for repeat barcodes
1291                                         map<string, int>::iterator itPrime = primers.find(oligo);
1292                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1293                                         
1294                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); }  }
1295                     
1296                                         primers[oligo]=indexPrimer; indexPrimer++;              
1297                                         primerNameVector.push_back(group);
1298                                 }
1299                                 else if(type == "REVERSE"){
1300                                         //Sequence oligoRC("reverse", oligo);
1301                                         //oligoRC.reverseComplement();
1302                     string oligoRC = reverseOligo(oligo);
1303                                         revPrimer.push_back(oligoRC);
1304                                 }
1305                                 else if(type == "BARCODE"){
1306                                         inOligos >> group;
1307                     
1308                     //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
1309                     //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
1310                                         string temp = "";
1311                     while (!inOligos.eof())     {       
1312                                                 char c = inOligos.get(); 
1313                                                 if (c == 10 || c == 13){        break;  }
1314                                                 else if (c == 32 || c == 9){;} //space or tab
1315                                                 else {  temp += c;  }
1316                                         } 
1317                                         
1318                     //then this is illumina data with 4 columns
1319                     if (temp != "") {  
1320                         
1321                         for(int i=0;i<group.length();i++){
1322                             group[i] = toupper(group[i]);
1323                             if(group[i] == 'U') {       group[i] = 'T'; }
1324                         }
1325                         
1326                         if (m->debug) { m->mothurOut("[DEBUG]: reading reverse " + group + ", and group = " + temp + ".\n"); }
1327                         
1328                         string reverseBarcode = reverseOligo(group); //reverse barcode
1329                         //check for repeat barcodes
1330                         map<string, int>::iterator itBar = rbarcodes.find(reverseBarcode);
1331                         if (itBar != rbarcodes.end()) { m->mothurOut("reverse barcode " + group + " is in your oligos file already."); m->mothurOutEndLine();  }
1332                         
1333                         group = temp;
1334                         rbarcodes[reverseBarcode]=indexBarcode; 
1335                     }else { if (m->debug) { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); } }
1336                         
1337                                         //check for repeat barcodes
1338                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1339                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1340                                                 
1341                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1342                                         barcodeNameVector.push_back(group);
1343                                 }else if(type == "LINKER"){
1344                                         linker.push_back(oligo);
1345                                 }else if(type == "SPACER"){
1346                                         spacer.push_back(oligo);
1347                                 }
1348                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1349                         }
1350                         m->gobble(inOligos);
1351                 }       
1352                 inOligos.close();
1353                 
1354                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1355                 
1356                 //add in potential combos
1357                 if(barcodeNameVector.size() == 0){
1358                         barcodes[""] = 0;
1359                         barcodeNameVector.push_back("");                        
1360                 }
1361                 
1362                 if(primerNameVector.size() == 0){
1363                         primers[""] = 0;
1364                         primerNameVector.push_back("");                 
1365                 }
1366                 
1367                 fastaFileNames.resize(barcodeNameVector.size());
1368                 for(int i=0;i<fastaFileNames.size();i++){
1369                         fastaFileNames[i].assign(primerNameVector.size(), "");
1370                 }
1371                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1372                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1373                 
1374                 if(allFiles){
1375                         set<string> uniqueNames; //used to cleanup outputFileNames
1376                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1377                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1378                                         
1379                                         string primerName = primerNameVector[itPrimer->second];
1380                                         string barcodeName = barcodeNameVector[itBar->second];
1381                                         
1382                                         string comboGroupName = "";
1383                                         string fastaFileName = "";
1384                                         string qualFileName = "";
1385                                         string nameFileName = "";
1386                                         
1387                                         if(primerName == ""){
1388                                                 comboGroupName = barcodeNameVector[itBar->second];
1389                                         }
1390                                         else{
1391                                                 if(barcodeName == ""){
1392                                                         comboGroupName = primerNameVector[itPrimer->second];
1393                                                 }
1394                                                 else{
1395                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1396                                                 }
1397                                         }
1398                                         
1399                                         
1400                                         ofstream temp;
1401                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1402                                         if (uniqueNames.count(fastaFileName) == 0) {
1403                                                 outputNames.push_back(fastaFileName);
1404                                                 outputTypes["fasta"].push_back(fastaFileName);
1405                                                 uniqueNames.insert(fastaFileName);
1406                                         }
1407                                         
1408                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1409                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1410                                         
1411                                         if(qFileName != ""){
1412                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1413                                                 if (uniqueNames.count(qualFileName) == 0) {
1414                                                         outputNames.push_back(qualFileName);
1415                                                         outputTypes["qfile"].push_back(qualFileName);
1416                                                 }
1417                                                 
1418                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1419                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1420                                         }
1421                                         
1422                                         if(nameFile != ""){
1423                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1424                                                 if (uniqueNames.count(nameFileName) == 0) {
1425                                                         outputNames.push_back(nameFileName);
1426                                                         outputTypes["name"].push_back(nameFileName);
1427                                                 }
1428                                                 
1429                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1430                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1431                                         }
1432                                         
1433                                 }
1434                         }
1435                 }
1436                 numFPrimers = primers.size();
1437                 numRPrimers = revPrimer.size();
1438         numLinkers = linker.size();
1439         numSpacers = spacer.size();
1440                 
1441                 bool allBlank = true;
1442                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1443                         if (barcodeNameVector[i] != "") {
1444                                 allBlank = false;
1445                                 break;
1446                         }
1447                 }
1448                 for (int i = 0; i < primerNameVector.size(); i++) {
1449                         if (primerNameVector[i] != "") {
1450                                 allBlank = false;
1451                                 break;
1452                         }
1453                 }
1454
1455                 if (allBlank) {
1456                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1457                         allFiles = false;
1458                         return false;
1459                 }
1460                 
1461                 return true;
1462                 
1463         }
1464         catch(exception& e) {
1465                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1466                 exit(1);
1467         }
1468 }
1469 //***************************************************************************************************************
1470
1471 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1472         try {
1473                 bool success = 1;
1474                 if(qscores.getName() != ""){
1475                         qscores.trimQScores(-1, keepFirst);
1476                 }
1477                 sequence.trim(keepFirst);
1478                 return success;
1479         }
1480         catch(exception& e) {
1481                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1482                 exit(1);
1483         }
1484         
1485 }       
1486
1487 //***************************************************************************************************************
1488
1489 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1490         try {
1491                 bool success = 0;
1492                 
1493                 int length = sequence.getNumBases() - removeLast;
1494                 
1495                 if(length > 0){
1496                         if(qscores.getName() != ""){
1497                                 qscores.trimQScores(-1, length);
1498                         }
1499                         sequence.trim(length);
1500                         success = 1;
1501                 }
1502                 else{
1503                         success = 0;
1504                 }
1505
1506                 return success;
1507         }
1508         catch(exception& e) {
1509                 m->errorOut(e, "removeLastTrim", "countDiffs");
1510                 exit(1);
1511         }
1512         
1513 }       
1514
1515 //***************************************************************************************************************
1516
1517 bool TrimSeqsCommand::cullLength(Sequence& seq){
1518         try {
1519         
1520                 int length = seq.getNumBases();
1521                 bool success = 0;       //guilty until proven innocent
1522                 
1523                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1524                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1525                 else                                                                                            {       success = 0;    }
1526                 
1527                 return success;
1528         
1529         }
1530         catch(exception& e) {
1531                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1532                 exit(1);
1533         }
1534         
1535 }
1536
1537 //***************************************************************************************************************
1538
1539 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1540         try {
1541                 int longHomoP = seq.getLongHomoPolymer();
1542                 bool success = 0;       //guilty until proven innocent
1543                 
1544                 if(longHomoP <= maxHomoP){      success = 1;    }
1545                 else                                    {       success = 0;    }
1546                 
1547                 return success;
1548         }
1549         catch(exception& e) {
1550                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1551                 exit(1);
1552         }
1553         
1554 }
1555 //********************************************************************/
1556 string TrimSeqsCommand::reverseOligo(string oligo){
1557         try {
1558         string reverse = "";
1559         
1560         for(int i=oligo.length()-1;i>=0;i--){
1561             
1562             if(oligo[i] == 'A')         {       reverse += 'T'; }
1563             else if(oligo[i] == 'T'){   reverse += 'A'; }
1564             else if(oligo[i] == 'U'){   reverse += 'A'; }
1565             
1566             else if(oligo[i] == 'G'){   reverse += 'C'; }
1567             else if(oligo[i] == 'C'){   reverse += 'G'; }
1568             
1569             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1570             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1571             
1572             else if(oligo[i] == 'M'){   reverse += 'K'; }
1573             else if(oligo[i] == 'K'){   reverse += 'M'; }
1574             
1575             else if(oligo[i] == 'W'){   reverse += 'W'; }
1576             else if(oligo[i] == 'S'){   reverse += 'S'; }
1577             
1578             else if(oligo[i] == 'B'){   reverse += 'V'; }
1579             else if(oligo[i] == 'V'){   reverse += 'B'; }
1580             
1581             else if(oligo[i] == 'D'){   reverse += 'H'; }
1582             else if(oligo[i] == 'H'){   reverse += 'D'; }
1583             
1584             else                                                {       reverse += 'N'; }
1585         }
1586         
1587         
1588         return reverse;
1589     }
1590         catch(exception& e) {
1591                 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1592                 exit(1);
1593         }
1594 }
1595
1596 //***************************************************************************************************************
1597
1598 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1599         try {
1600                 int numNs = seq.getAmbigBases();
1601                 bool success = 0;       //guilty until proven innocent
1602                 
1603                 if(numNs <= maxAmbig)   {       success = 1;    }
1604                 else                                    {       success = 0;    }
1605                 
1606                 return success;
1607         }
1608         catch(exception& e) {
1609                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1610                 exit(1);
1611         }
1612         
1613 }
1614 //***************************************************************************************************************