]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
Merge remote-tracking branch 'origin/master'
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::setParameters(){        
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
22                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
23                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
24                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
25                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
26                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
27                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
28         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
29                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
30         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
31                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
32                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
33                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
34                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
35                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
36                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
37                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
38                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
39                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
40                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
41                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
42                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
43                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
44                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
45                         
46                 vector<string> myArray;
47                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string TrimSeqsCommand::getHelpString(){        
57         try {
58                 string helpString = "";
59                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
60                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
61                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
62                 helpString += "The fasta parameter is required.\n";
63                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
64                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
65                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
66                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
67                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
68                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
69                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
70                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
71                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
72                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
73         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
74                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
75                 helpString += "The qfile parameter allows you to provide a quality file.\n";
76                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
77                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
78                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
79                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
80                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
81                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
82                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
83                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
84                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
85                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
86                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
87                 helpString += "The trim.seqs command should be in the following format: \n";
88                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
89                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
90                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
91                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
92                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101 string TrimSeqsCommand::getOutputFileNameTag(string type, string inputName=""){ 
102         try {
103         string outputFileName = "";
104                 map<string, vector<string> >::iterator it;
105         
106         //is this a type this command creates
107         it = outputTypes.find(type);
108         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
109         else {
110             if (type == "qfile")            {   outputFileName =  "qual";   }
111             else if (type == "fasta")            {   outputFileName =  "fasta";   }
112             else if (type == "group")            {   outputFileName =  "groups";   }
113             else if (type == "name")            {   outputFileName =  "names";   }
114             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
115         }
116         return outputFileName;
117         }
118         catch(exception& e) {
119                 m->errorOut(e, "TrimSeqsCommand", "getOutputFileNameTag");
120                 exit(1);
121         }
122 }
123
124
125 //**********************************************************************************************************************
126
127 TrimSeqsCommand::TrimSeqsCommand(){     
128         try {
129                 abort = true; calledHelp = true; 
130                 setParameters();
131                 vector<string> tempOutNames;
132                 outputTypes["fasta"] = tempOutNames;
133                 outputTypes["qfile"] = tempOutNames;
134                 outputTypes["group"] = tempOutNames;
135                 outputTypes["name"] = tempOutNames;
136         }
137         catch(exception& e) {
138                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
139                 exit(1);
140         }
141 }
142 //***************************************************************************************************************
143
144 TrimSeqsCommand::TrimSeqsCommand(string option)  {
145         try {
146                 
147                 abort = false; calledHelp = false;   
148                 comboStarts = 0;
149                 
150                 //allow user to run help
151                 if(option == "help") { help(); abort = true; calledHelp = true; }
152                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
153                 
154                 else {
155                         vector<string> myArray = setParameters();
156                         
157                         OptionParser parser(option);
158                         map<string,string> parameters = parser.getParameters();
159                         
160                         ValidParameters validParameter;
161                         map<string,string>::iterator it;
162                         
163                         //check to make sure all parameters are valid for command
164                         for (it = parameters.begin(); it != parameters.end(); it++) { 
165                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
166                         }
167                         
168                         //initialize outputTypes
169                         vector<string> tempOutNames;
170                         outputTypes["fasta"] = tempOutNames;
171                         outputTypes["qfile"] = tempOutNames;
172                         outputTypes["group"] = tempOutNames;
173                         outputTypes["name"] = tempOutNames;
174                         
175                         //if the user changes the input directory command factory will send this info to us in the output parameter 
176                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
177                         if (inputDir == "not found"){   inputDir = "";          }
178                         else {
179                                 string path;
180                                 it = parameters.find("fasta");
181                                 //user has given a template file
182                                 if(it != parameters.end()){ 
183                                         path = m->hasPath(it->second);
184                                         //if the user has not given a path then, add inputdir. else leave path alone.
185                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
186                                 }
187                                 
188                                 it = parameters.find("oligos");
189                                 //user has given a template file
190                                 if(it != parameters.end()){ 
191                                         path = m->hasPath(it->second);
192                                         //if the user has not given a path then, add inputdir. else leave path alone.
193                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
194                                 }
195                                 
196                                 it = parameters.find("qfile");
197                                 //user has given a template file
198                                 if(it != parameters.end()){ 
199                                         path = m->hasPath(it->second);
200                                         //if the user has not given a path then, add inputdir. else leave path alone.
201                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
202                                 }
203                                 
204                                 it = parameters.find("name");
205                                 //user has given a template file
206                                 if(it != parameters.end()){ 
207                                         path = m->hasPath(it->second);
208                                         //if the user has not given a path then, add inputdir. else leave path alone.
209                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
210                                 }
211                                 
212                         }
213
214                         
215                         //check for required parameters
216                         fastaFile = validParameter.validFile(parameters, "fasta", true);
217                         if (fastaFile == "not found") {                                 
218                                 fastaFile = m->getFastaFile(); 
219                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
220                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
221                         }else if (fastaFile == "not open") { abort = true; }    
222                         else { m->setFastaFile(fastaFile); }
223                         
224                         //if the user changes the output directory command factory will send this info to us in the output parameter 
225                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
226                                 outputDir = ""; 
227                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
228                         }
229                 
230                         
231                         //check for optional parameter and set defaults
232                         // ...at some point should added some additional type checking...
233                         string temp;
234                         temp = validParameter.validFile(parameters, "flip", false);
235                         if (temp == "not found")    {   flip = 0;       }
236                         else {  flip = m->isTrue(temp);         }
237                 
238                         temp = validParameter.validFile(parameters, "oligos", true);
239                         if (temp == "not found"){       oligoFile = "";         }
240                         else if(temp == "not open"){    abort = true;   } 
241                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
242                         
243                         
244                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
245                         m->mothurConvert(temp, maxAmbig);  
246
247                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
248                         m->mothurConvert(temp, maxHomoP);  
249
250                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
251                         m->mothurConvert(temp, minLength); 
252                         
253                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
254                         m->mothurConvert(temp, maxLength);
255                         
256                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
257                         m->mothurConvert(temp, bdiffs);
258                         
259                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
260                         m->mothurConvert(temp, pdiffs);
261             
262             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
263                         m->mothurConvert(temp, ldiffs);
264             
265             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
266                         m->mothurConvert(temp, sdiffs);
267                         
268                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
269                         m->mothurConvert(temp, tdiffs);
270                         
271                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
272                         
273                         temp = validParameter.validFile(parameters, "qfile", true);     
274                         if (temp == "not found")        {       qFileName = "";         }
275                         else if(temp == "not open")     {       abort = true;           }
276                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
277                         
278                         temp = validParameter.validFile(parameters, "name", true);      
279                         if (temp == "not found")        {       nameFile = "";          }
280                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
281                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
282                         
283                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
284                         m->mothurConvert(temp, qThreshold);
285                         
286                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
287                         qtrim = m->isTrue(temp);
288
289                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
290                         convert(temp, qRollAverage);
291
292                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
293                         convert(temp, qWindowAverage);
294
295                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
296                         convert(temp, qWindowSize);
297
298                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
299                         convert(temp, qWindowStep);
300
301                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
302                         convert(temp, qAverage);
303
304                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
305                         convert(temp, keepFirst);
306
307                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
308                         convert(temp, removeLast);
309                         
310                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
311                         allFiles = m->isTrue(temp);
312             
313             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
314                         keepforward = m->isTrue(temp);
315                         
316                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
317                         m->setProcessors(temp);
318                         m->mothurConvert(temp, processors); 
319                         
320                         
321                         if(allFiles && (oligoFile == "")){
322                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
323                         }
324                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
325                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
326                                 qAverage=0;
327                                 qThreshold=0;
328                         }
329                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
330                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
331                                 abort = true;
332                         }
333                         
334                         if (nameFile == "") {
335                                 vector<string> files; files.push_back(fastaFile);
336                                 parser.getNameFile(files);
337                         }
338                 }
339
340         }
341         catch(exception& e) {
342                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
343                 exit(1);
344         }
345 }
346 //***************************************************************************************************************
347
348 int TrimSeqsCommand::execute(){
349         try{
350         
351                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
352                 
353                 numFPrimers = 0;  //this needs to be initialized
354                 numRPrimers = 0;
355         numSpacers = 0;
356         numLinkers = 0;
357                 createGroup = false;
358                 vector<vector<string> > fastaFileNames;
359                 vector<vector<string> > qualFileNames;
360                 vector<vector<string> > nameFileNames;
361                 
362                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("fasta");
363                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
364                 
365                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("fasta");
366                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
367                 
368                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("qfile");
369                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("qfile");
370                 
371                 if (qFileName != "") {
372                         outputNames.push_back(trimQualFile);
373                         outputNames.push_back(scrapQualFile);
374                         outputTypes["qfile"].push_back(trimQualFile);
375                         outputTypes["qfile"].push_back(scrapQualFile); 
376                 }
377                 
378                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim." + getOutputFileNameTag("name");
379                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap." + getOutputFileNameTag("name");
380                 
381                 if (nameFile != "") {
382                         m->readNames(nameFile, nameMap);
383                         outputNames.push_back(trimNameFile);
384                         outputNames.push_back(scrapNameFile);
385                         outputTypes["name"].push_back(trimNameFile);
386                         outputTypes["name"].push_back(scrapNameFile); 
387                 }
388                 
389                 if (m->control_pressed) { return 0; }
390                 
391                 string outputGroupFileName;
392                 if(oligoFile != ""){
393                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
394                         if (createGroup) {
395                                 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + getOutputFileNameTag("group");
396                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
397                         }
398                 }
399        
400         //fills lines and qlines
401                 setLines(fastaFile, qFileName);
402                 
403         if(processors == 1){
404             driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
405         }else{
406             createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
407         }       
408                 
409                 
410                 if (m->control_pressed) {  return 0; }                  
411         
412                 if(allFiles){
413                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
414                         map<string, string>::iterator it;
415                         set<string> namesToRemove;
416                         for(int i=0;i<fastaFileNames.size();i++){
417                                 for(int j=0;j<fastaFileNames[0].size();j++){
418                                         if (fastaFileNames[i][j] != "") {
419                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
420                                                         if(m->isBlank(fastaFileNames[i][j])){
421                                                                 m->mothurRemove(fastaFileNames[i][j]);
422                                                                 namesToRemove.insert(fastaFileNames[i][j]);
423                                                         
424                                                                 if(qFileName != ""){
425                                                                         m->mothurRemove(qualFileNames[i][j]);
426                                                                         namesToRemove.insert(qualFileNames[i][j]);
427                                                                 }
428                                                                 
429                                                                 if(nameFile != ""){
430                                                                         m->mothurRemove(nameFileNames[i][j]);
431                                                                         namesToRemove.insert(nameFileNames[i][j]);
432                                                                 }
433                                                         }else{  
434                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
435                                                                 if (it == uniqueFastaNames.end()) {     
436                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
437                                                                 }       
438                                                         }
439                                                 }
440                                         }
441                                 }
442                         }
443                         
444                         //remove names for outputFileNames, just cleans up the output
445                         vector<string> outputNames2;
446                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
447                         outputNames = outputNames2;
448                         
449                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
450                                 ifstream in;
451                                 m->openInputFile(it->first, in);
452                                 
453                                 ofstream out;
454                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + getOutputFileNameTag("group");
455                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
456                                 m->openOutputFile(thisGroupName, out);
457                                 
458                                 while (!in.eof()){
459                                         if (m->control_pressed) { break; }
460                                         
461                                         Sequence currSeq(in); m->gobble(in);
462                                         out << currSeq.getName() << '\t' << it->second << endl;
463                     
464                     if (nameFile != "") {
465                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
466                         if (itName != nameMap.end()) { 
467                             vector<string> thisSeqsNames; 
468                             m->splitAtChar(itName->second, thisSeqsNames, ',');
469                             for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
470                                 out << thisSeqsNames[k] << '\t' << it->second << endl;
471                             }
472                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
473                     }
474                                 }
475                                 in.close();
476                                 out.close();
477                         }
478                 }
479                 
480                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
481
482                 //output group counts
483                 m->mothurOutEndLine();
484                 int total = 0;
485                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
486                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
487                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
488                 }
489                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
490                 
491                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
492
493                 //set fasta file as new current fastafile
494                 string current = "";
495                 itTypes = outputTypes.find("fasta");
496                 if (itTypes != outputTypes.end()) {
497                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
498                 }
499                 
500                 itTypes = outputTypes.find("name");
501                 if (itTypes != outputTypes.end()) {
502                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
503                 }
504                 
505                 itTypes = outputTypes.find("qfile");
506                 if (itTypes != outputTypes.end()) {
507                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
508                 }
509                 
510                 itTypes = outputTypes.find("group");
511                 if (itTypes != outputTypes.end()) {
512                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
513                 }
514
515                 m->mothurOutEndLine();
516                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
517                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
518                 m->mothurOutEndLine();
519                 
520                 return 0;       
521                         
522         }
523         catch(exception& e) {
524                 m->errorOut(e, "TrimSeqsCommand", "execute");
525                 exit(1);
526         }
527 }
528                 
529 /**************************************************************************************/
530
531 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {      
532                 
533         try {
534                 
535                 ofstream trimFASTAFile;
536                 m->openOutputFile(trimFileName, trimFASTAFile);
537                 
538                 ofstream scrapFASTAFile;
539                 m->openOutputFile(scrapFileName, scrapFASTAFile);
540                 
541                 ofstream trimQualFile;
542                 ofstream scrapQualFile;
543                 if(qFileName != ""){
544                         m->openOutputFile(trimQFileName, trimQualFile);
545                         m->openOutputFile(scrapQFileName, scrapQualFile);
546                 }
547                 
548                 ofstream trimNameFile;
549                 ofstream scrapNameFile;
550                 if(nameFile != ""){
551                         m->openOutputFile(trimNFileName, trimNameFile);
552                         m->openOutputFile(scrapNFileName, scrapNameFile);
553                 }
554                 
555                 
556                 ofstream outGroupsFile;
557                 if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
558                 if(allFiles){
559                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
560                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
561                                         if (fastaFileNames[i][j] != "") {
562                                                 ofstream temp;
563                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
564                                                 if(qFileName != ""){
565                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
566                                                 }
567                                                 
568                                                 if(nameFile != ""){
569                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
570                                                 }
571                                         }
572                                 }
573                         }
574                 }
575                 
576                 ifstream inFASTA;
577                 m->openInputFile(filename, inFASTA);
578                 inFASTA.seekg(line.start);
579                 
580                 ifstream qFile;
581                 if(qFileName != "")     {
582                         m->openInputFile(qFileName, qFile);
583                         qFile.seekg(qline.start);  
584                 }
585                 
586                 int count = 0;
587                 bool moreSeqs = 1;
588                 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer);
589         
590                 while (moreSeqs) {
591                                 
592                         if (m->control_pressed) { 
593                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
594                                 if (createGroup) {       outGroupsFile.close();   }
595
596                                 if(qFileName != ""){
597                                         qFile.close();
598                                 }
599                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
600
601                                 return 0;
602                         }
603                         
604                         int success = 1;
605                         string trashCode = "";
606                         int currentSeqsDiffs = 0;
607
608                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
609                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
610             if (m->debug) { m->mothurOut("[DEBUG]: " + toString(count) + " fasta = " + currSeq.getName() + '\n'); }
611                         QualityScores currQual;
612                         if(qFileName != ""){
613                                 currQual = QualityScores(qFile);  m->gobble(qFile);
614                  if (m->debug) { m->mothurOut("[DEBUG]: qual = " + currQual.getName() + '\n'); }
615                         }
616                         
617                         string origSeq = currSeq.getUnaligned();
618                         if (origSeq != "") {
619                                 
620                                 int barcodeIndex = 0;
621                                 int primerIndex = 0;
622                                 
623                 if(numLinkers != 0){
624                                         success = trimOligos.stripLinker(currSeq, currQual);
625                                         if(success > ldiffs)            {       trashCode += 'k';       }
626                                         else{ currentSeqsDiffs += success;  }
627
628                                 }
629                 
630                                 if(barcodes.size() != 0){
631                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
632                                         if(success > bdiffs)            {       trashCode += 'b';       }
633                                         else{ currentSeqsDiffs += success;  }
634                                 }
635                 
636                 if(rbarcodes.size() != 0){
637                                         success = trimOligos.stripRBarcode(currSeq, currQual, barcodeIndex);
638                                         if(success > bdiffs)            {       trashCode += 'b';       }
639                                         else{ currentSeqsDiffs += success;  }
640                                 }
641                                 
642                 if(numSpacers != 0){
643                                         success = trimOligos.stripSpacer(currSeq, currQual);
644                                         if(success > sdiffs)            {       trashCode += 's';       }
645                                         else{ currentSeqsDiffs += success;  }
646
647                                 }
648                 
649                                 if(numFPrimers != 0){
650                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
651                                         if(success > pdiffs)            {       trashCode += 'f';       }
652                                         else{ currentSeqsDiffs += success;  }
653                                 }
654                                 
655                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
656                                 
657                                 if(numRPrimers != 0){
658                                         success = trimOligos.stripReverse(currSeq, currQual);
659                                         if(!success)                            {       trashCode += 'r';       }
660                                 }
661
662                                 if(keepFirst != 0){
663                                         success = keepFirstTrim(currSeq, currQual);
664                                 }
665                                 
666                                 if(removeLast != 0){
667                                         success = removeLastTrim(currSeq, currQual);
668                                         if(!success)                            {       trashCode += 'l';       }
669                                 }
670
671                                 
672                                 if(qFileName != ""){
673                                         int origLength = currSeq.getNumBases();
674                                         
675                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
676                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
677                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
678                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
679                                         else                                            {       success = 1;                            }
680                                         
681                                         //you don't want to trim, if it fails above then scrap it
682                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
683                                         
684                                         if(!success)                            {       trashCode += 'q';       }
685                                 }                               
686                 
687                                 if(minLength > 0 || maxLength > 0){
688                                         success = cullLength(currSeq);
689                                         if(!success)                            {       trashCode += 'l';       }
690                                 }
691                                 if(maxHomoP > 0){
692                                         success = cullHomoP(currSeq);
693                                         if(!success)                            {       trashCode += 'h';       }
694                                 }
695                                 if(maxAmbig != -1){
696                                         success = cullAmbigs(currSeq);
697                                         if(!success)                            {       trashCode += 'n';       }
698                                 }
699                                 
700                                 if(flip){               // should go last                       
701                                         currSeq.reverseComplement();
702                                         if(qFileName != ""){
703                                                 currQual.flipQScores(); 
704                                         }
705                                 }
706                                 
707                                 if(trashCode.length() == 0){
708                                         currSeq.setAligned(currSeq.getUnaligned());
709                                         currSeq.printSequence(trimFASTAFile);
710                                         
711                                         if(qFileName != ""){
712                                                 currQual.printQScores(trimQualFile);
713                                         }
714                                         
715                     
716                                         if(nameFile != ""){
717                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
718                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
719                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
720                                         }
721                                         
722                                         if (createGroup) {
723                                                 if(barcodes.size() != 0){
724                                                         string thisGroup = barcodeNameVector[barcodeIndex];
725                                                         if (primers.size() != 0) { 
726                                                                 if (primerNameVector[primerIndex] != "") { 
727                                                                         if(thisGroup != "") {
728                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
729                                                                         }else {
730                                                                                 thisGroup = primerNameVector[primerIndex]; 
731                                                                         }
732                                                                 } 
733                                                         }
734                                                         
735                                                         outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
736                                                         
737                             int numRedundants = 0;
738                                                         if (nameFile != "") {
739                                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
740                                                                 if (itName != nameMap.end()) { 
741                                                                         vector<string> thisSeqsNames; 
742                                                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
743                                     numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
744                                                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
745                                                                                 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
746                                                                         }
747                                                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
748                                                         }
749                                                         
750                                                         map<string, int>::iterator it = groupCounts.find(thisGroup);
751                                                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1 + numRedundants; }
752                                                         else { groupCounts[it->first] += (1 + numRedundants); }
753                                                                 
754                                                 }
755                                         }
756                                         
757                                         if(allFiles){
758                                                 ofstream output;
759                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
760                                                 currSeq.printSequence(output);
761                                                 output.close();
762                                                 
763                                                 if(qFileName != ""){
764                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
765                                                         currQual.printQScores(output);
766                                                         output.close();                                                 
767                                                 }
768                                                 
769                                                 if(nameFile != ""){
770                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
771                                                         if (itName != nameMap.end()) { 
772                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
773                                                                 output << itName->first << '\t' << itName->second << endl; 
774                                                                 output.close();
775                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
776                                                 }
777                                         }
778                                 }
779                                 else{
780                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
781                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
782                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
783                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
784                                         }
785                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
786                                         currSeq.setUnaligned(origSeq);
787                                         currSeq.setAligned(origSeq);
788                                         currSeq.printSequence(scrapFASTAFile);
789                                         if(qFileName != ""){
790                                                 currQual.printQScores(scrapQualFile);
791                                         }
792                                 }
793                                 count++;
794                         }
795                         
796                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
797                                 unsigned long long pos = inFASTA.tellg();
798                                 if ((pos == -1) || (pos >= line.end)) { break; }
799                         
800                         #else
801                                 if (inFASTA.eof()) { break; }
802                         #endif
803                         
804                         //report progress
805                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
806                         
807                 }
808                 //report progress
809                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
810                 
811                 
812                 inFASTA.close();
813                 trimFASTAFile.close();
814                 scrapFASTAFile.close();
815                 if (createGroup) {       outGroupsFile.close();   }
816                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
817                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
818                 
819                 return count;
820         }
821         catch(exception& e) {
822                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
823                 exit(1);
824         }
825 }
826
827 /**************************************************************************************************/
828
829 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
830         try {
831         
832         int process = 1;
833                 int exitCommand = 1;
834                 processIDS.clear();
835                 
836 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
837                                 //loop through and create all the processes you want
838                 while (process != processors) {
839                         int pid = fork();
840                         
841                         if (pid > 0) {
842                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
843                                 process++;
844                         }else if (pid == 0){
845                                 
846                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
847                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
848                                 vector<vector<string> > tempNameFileNames = nameFileNames;
849
850                                 if(allFiles){
851                                         ofstream temp;
852
853                                         for(int i=0;i<tempFASTAFileNames.size();i++){
854                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
855                                                         if (tempFASTAFileNames[i][j] != "") {
856                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
857                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
858
859                                                                 if(qFileName != ""){
860                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
861                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
862                                                                 }
863                                                                 if(nameFile != ""){
864                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
865                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
866                                                                 }
867                                                         }
868                                                 }
869                                         }
870                                 }
871                                                         
872                                 driverCreateTrim(filename,
873                                                                  qFileName,
874                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
875                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
876                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
877                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
878                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
879                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
880                                                                  (groupFile + toString(getpid()) + ".temp"),
881                                                                  tempFASTAFileNames,
882                                                                  tempPrimerQualFileNames,
883                                                                  tempNameFileNames,
884                                                                  lines[process],
885                                                                  qLines[process]);
886                                 
887                                 //pass groupCounts to parent
888                                 if(createGroup){
889                                         ofstream out;
890                                         string tempFile = filename + toString(getpid()) + ".num.temp";
891                                         m->openOutputFile(tempFile, out);
892                                         
893                                         out << groupCounts.size() << endl;
894                                         
895                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
896                                                 out << it->first << '\t' << it->second << endl;
897                                         }
898                                         out.close();
899                                 }
900                                 exit(0);
901                         }else { 
902                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
903                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
904                                 exit(0);
905                         }
906                 }
907                 
908                 //parent do my part
909                 ofstream temp;
910                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
911                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
912                 if(qFileName != ""){
913                         m->openOutputFile(trimQualFileName, temp);              temp.close();
914                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
915                 }
916                 if (nameFile != "") {
917                         m->openOutputFile(trimNameFileName, temp);              temp.close();
918                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
919                 }
920
921                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
922                 
923                 //force parent to wait until all the processes are done
924                 for (int i=0;i<processIDS.size();i++) { 
925                         int temp = processIDS[i];
926                         wait(&temp);
927                 }
928 #else
929         //////////////////////////////////////////////////////////////////////////////////////////////////////
930                 //Windows version shared memory, so be careful when passing variables through the trimData struct. 
931                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
932                 //////////////////////////////////////////////////////////////////////////////////////////////////////
933                 
934                 vector<trimData*> pDataArray; 
935                 DWORD   dwThreadIdArray[processors-1];
936                 HANDLE  hThreadArray[processors-1]; 
937                 
938                 //Create processor worker threads.
939                 for( int i=0; i<processors-1; i++){
940                         
941             string extension = "";
942                         if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
943             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
944             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
945             vector<vector<string> > tempNameFileNames = nameFileNames;
946             
947             if(allFiles){
948                 ofstream temp;
949                 
950                 for(int i=0;i<tempFASTAFileNames.size();i++){
951                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
952                         if (tempFASTAFileNames[i][j] != "") {
953                             tempFASTAFileNames[i][j] += extension;
954                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
955                             
956                             if(qFileName != ""){
957                                 tempPrimerQualFileNames[i][j] += extension;
958                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
959                             }
960                             if(nameFile != ""){
961                                 tempNameFileNames[i][j] += extension;
962                                 m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
963                             }
964                         }
965                     }
966                 }
967             }
968
969             
970                         trimData* tempTrim = new trimData(filename,
971                                               qFileName, nameFile,
972                                               (trimFASTAFileName+extension),
973                                               (scrapFASTAFileName+extension),
974                                               (trimQualFileName+extension),
975                                               (scrapQualFileName+extension),
976                                               (trimNameFileName+extension),
977                                               (scrapNameFileName+extension),
978                                               (groupFile+extension),
979                                               tempFASTAFileNames,
980                                               tempPrimerQualFileNames,
981                                               tempNameFileNames,
982                                               lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
983                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer, 
984                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
985                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
986                                              minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
987                         pDataArray.push_back(tempTrim);
988             
989                         hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
990                 }
991         
992         //parent do my part
993                 ofstream temp;
994                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
995                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
996                 if(qFileName != ""){
997                         m->openOutputFile(trimQualFileName, temp);              temp.close();
998                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
999                 }
1000                 if (nameFile != "") {
1001                         m->openOutputFile(trimNameFileName, temp);              temp.close();
1002                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
1003                 }
1004         
1005                 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
1006         processIDS.push_back(processors-1);
1007
1008         
1009                 //Wait until all threads have terminated.
1010                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1011                 
1012                 //Close all thread handles and free memory allocations.
1013                 for(int i=0; i < pDataArray.size(); i++){
1014                         for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
1015                 map<string, int>::iterator it2 = groupCounts.find(it->first);
1016                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
1017                 else { groupCounts[it->first] += it->second; }
1018             }
1019             CloseHandle(hThreadArray[i]);
1020                         delete pDataArray[i];
1021                 }
1022         
1023 #endif          
1024         
1025         
1026         //append files
1027                 for(int i=0;i<processIDS.size();i++){
1028                         
1029                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1030                         
1031                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
1032                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
1033                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1034                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1035                         
1036                         if(qFileName != ""){
1037                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1038                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1039                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1040                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1041                         }
1042                         
1043                         if(nameFile != ""){
1044                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1045                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1046                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1047                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1048                         }
1049                         
1050                         if(createGroup){
1051                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1052                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1053                         }
1054                         
1055                         
1056                         if(allFiles){
1057                                 for(int j=0;j<fastaFileNames.size();j++){
1058                                         for(int k=0;k<fastaFileNames[j].size();k++){
1059                                                 if (fastaFileNames[j][k] != "") {
1060                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1061                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1062                                                         
1063                                                         if(qFileName != ""){
1064                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1065                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1066                                                         }
1067                                                         
1068                                                         if(nameFile != ""){
1069                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1070                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1071                                                         }
1072                                                 }
1073                                         }
1074                                 }
1075                         }
1076                         
1077             #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1078                         if(createGroup){
1079                                 ifstream in;
1080                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1081                                 m->openInputFile(tempFile, in);
1082                                 int tempNum;
1083                                 string group;
1084                                 
1085                                 in >> tempNum; m->gobble(in);
1086                                 
1087                                 if (tempNum != 0) {
1088                                         while (!in.eof()) { 
1089                                                 in >> group >> tempNum; m->gobble(in);
1090                         
1091                                                 map<string, int>::iterator it = groupCounts.find(group);
1092                                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
1093                                                 else { groupCounts[it->first] += tempNum; }
1094                                         }
1095                                 }
1096                                 in.close(); m->mothurRemove(tempFile);
1097                         }
1098             #endif
1099                 }
1100
1101         return exitCommand;
1102         }
1103         catch(exception& e) {
1104                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1105                 exit(1);
1106         }
1107 }
1108
1109 /**************************************************************************************************/
1110
1111 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1112         try {
1113         
1114         vector<unsigned long long> fastaFilePos;
1115                 vector<unsigned long long> qfileFilePos;
1116                 
1117                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1118                 //set file positions for fasta file
1119                 fastaFilePos = m->divideFile(filename, processors);
1120                 
1121                 //get name of first sequence in each chunk
1122                 map<string, int> firstSeqNames;
1123                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1124                         ifstream in;
1125                         m->openInputFile(filename, in);
1126                         in.seekg(fastaFilePos[i]);
1127                 
1128                         Sequence temp(in); 
1129                         firstSeqNames[temp.getName()] = i;
1130                 
1131                         in.close();
1132                 }
1133                 
1134                 if(qfilename != "")     {
1135             //seach for filePos of each first name in the qfile and save in qfileFilePos
1136             ifstream inQual;
1137             m->openInputFile(qfilename, inQual);
1138             
1139             string input;
1140             while(!inQual.eof()){       
1141                 input = m->getline(inQual);
1142                 
1143                 if (input.length() != 0) {
1144                     if(input[0] == '>'){ //this is a sequence name line
1145                         istringstream nameStream(input);
1146                         
1147                         string sname = "";  nameStream >> sname;
1148                         sname = sname.substr(1);
1149                         
1150                         map<string, int>::iterator it = firstSeqNames.find(sname);
1151                         
1152                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
1153                             unsigned long long pos = inQual.tellg(); 
1154                             qfileFilePos.push_back(pos - input.length() - 1);   
1155                             firstSeqNames.erase(it);
1156                         }
1157                     }
1158                 }
1159                 
1160                 if (firstSeqNames.size() == 0) { break; }
1161             }
1162             inQual.close();
1163             
1164             
1165             if (firstSeqNames.size() != 0) { 
1166                 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1167                     m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1168                 }
1169                 qFileName = "";
1170                 return processors;
1171             }
1172             
1173             //get last file position of qfile
1174             FILE * pFile;
1175             unsigned long long size;
1176             
1177             //get num bytes in file
1178             pFile = fopen (qfilename.c_str(),"rb");
1179             if (pFile==NULL) perror ("Error opening file");
1180             else{
1181                 fseek (pFile, 0, SEEK_END);
1182                 size=ftell (pFile);
1183                 fclose (pFile);
1184             }
1185             
1186             qfileFilePos.push_back(size);
1187         }
1188         
1189         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1190             if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); }
1191                         lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1192                         if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
1193                 }       
1194                 if(qfilename == "")     {       qLines = lines; } //files with duds
1195                 
1196                 return processors;
1197                 
1198                 #else
1199             
1200         if (processors == 1) { //save time
1201                         //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1202                         //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1203             lines.push_back(linePair(0, 1000));
1204             if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
1205         }else{
1206             int numFastaSeqs = 0;
1207             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
1208             if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1209         
1210             if (qfilename != "") { 
1211                 int numQualSeqs = 0;
1212                 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
1213                 
1214                 if (numFastaSeqs != numQualSeqs) {
1215                     m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
1216                 }
1217             }
1218         
1219             //figure out how many sequences you have to process
1220             int numSeqsPerProcessor = numFastaSeqs / processors;
1221             for (int i = 0; i < processors; i++) {
1222                 int startIndex =  i * numSeqsPerProcessor;
1223                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
1224                 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1225                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1226             }
1227         }
1228             if(qfilename == "") {       qLines = lines; } //files with duds
1229                         return 1;
1230                 
1231                 #endif
1232         }
1233         catch(exception& e) {
1234                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1235                 exit(1);
1236         }
1237 }
1238
1239 //***************************************************************************************************************
1240
1241 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1242         try {
1243                 ifstream inOligos;
1244                 m->openInputFile(oligoFile, inOligos);
1245                 
1246                 ofstream test;
1247                 
1248                 string type, oligo, group;
1249
1250                 int indexPrimer = 0;
1251                 int indexBarcode = 0;
1252                 
1253                 while(!inOligos.eof()){
1254
1255                         inOligos >> type; 
1256                                         
1257                         if(type[0] == '#'){
1258                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1259                                 m->gobble(inOligos);
1260                         }
1261                         else{
1262                                 m->gobble(inOligos);
1263                                 //make type case insensitive
1264                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1265                                 
1266                                 inOligos >> oligo;
1267                                 
1268                                 for(int i=0;i<oligo.length();i++){
1269                                         oligo[i] = toupper(oligo[i]);
1270                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1271                                 }
1272                                 
1273                                 if(type == "FORWARD"){
1274                                         group = "";
1275                                         
1276                                         // get rest of line in case there is a primer name
1277                                         while (!inOligos.eof()) {       
1278                                                 char c = inOligos.get(); 
1279                                                 if (c == 10 || c == 13){        break;  }
1280                                                 else if (c == 32 || c == 9){;} //space or tab
1281                                                 else {  group += c;  }
1282                                         } 
1283                                         
1284                                         //check for repeat barcodes
1285                                         map<string, int>::iterator itPrime = primers.find(oligo);
1286                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1287                                         
1288                                         primers[oligo]=indexPrimer; indexPrimer++;              
1289                                         primerNameVector.push_back(group);
1290                                 }
1291                                 else if(type == "REVERSE"){
1292                                         //Sequence oligoRC("reverse", oligo);
1293                                         //oligoRC.reverseComplement();
1294                     string oligoRC = reverseOligo(oligo);
1295                                         revPrimer.push_back(oligoRC);
1296                                 }
1297                                 else if(type == "BARCODE"){
1298                                         inOligos >> group;
1299                     
1300                     //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
1301                     //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
1302                                         string temp = "";
1303                     while (!inOligos.eof())     {       
1304                                                 char c = inOligos.get(); 
1305                                                 if (c == 10 || c == 13){        break;  }
1306                                                 else if (c == 32 || c == 9){;} //space or tab
1307                                                 else {  temp += c;  }
1308                                         } 
1309                                         
1310                     //then this is illumina data with 4 columns
1311                     if (temp != "") {  
1312                         string reverseBarcode = reverseOligo(group); //reverse barcode
1313                         group = temp;
1314                         
1315                         //check for repeat barcodes
1316                         map<string, int>::iterator itBar = rbarcodes.find(reverseBarcode);
1317                         if (itBar != rbarcodes.end()) { m->mothurOut("barcode " + reverseBarcode + " is in your oligos file already."); m->mothurOutEndLine();  }
1318                                                 
1319                         rbarcodes[reverseBarcode]=indexBarcode; 
1320                     }
1321                         
1322                                         //check for repeat barcodes
1323                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1324                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1325                                                 
1326                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1327                                         barcodeNameVector.push_back(group);
1328                                 }else if(type == "LINKER"){
1329                                         linker.push_back(oligo);
1330                                 }else if(type == "SPACER"){
1331                                         spacer.push_back(oligo);
1332                                 }
1333                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
1334                         }
1335                         m->gobble(inOligos);
1336                 }       
1337                 inOligos.close();
1338                 
1339                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1340                 
1341                 //add in potential combos
1342                 if(barcodeNameVector.size() == 0){
1343                         barcodes[""] = 0;
1344                         barcodeNameVector.push_back("");                        
1345                 }
1346                 
1347                 if(primerNameVector.size() == 0){
1348                         primers[""] = 0;
1349                         primerNameVector.push_back("");                 
1350                 }
1351                 
1352                 fastaFileNames.resize(barcodeNameVector.size());
1353                 for(int i=0;i<fastaFileNames.size();i++){
1354                         fastaFileNames[i].assign(primerNameVector.size(), "");
1355                 }
1356                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1357                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1358                 
1359                 if(allFiles){
1360                         set<string> uniqueNames; //used to cleanup outputFileNames
1361                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1362                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1363                                         
1364                                         string primerName = primerNameVector[itPrimer->second];
1365                                         string barcodeName = barcodeNameVector[itBar->second];
1366                                         
1367                                         string comboGroupName = "";
1368                                         string fastaFileName = "";
1369                                         string qualFileName = "";
1370                                         string nameFileName = "";
1371                                         
1372                                         if(primerName == ""){
1373                                                 comboGroupName = barcodeNameVector[itBar->second];
1374                                         }
1375                                         else{
1376                                                 if(barcodeName == ""){
1377                                                         comboGroupName = primerNameVector[itPrimer->second];
1378                                                 }
1379                                                 else{
1380                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1381                                                 }
1382                                         }
1383                                         
1384                                         
1385                                         ofstream temp;
1386                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1387                                         if (uniqueNames.count(fastaFileName) == 0) {
1388                                                 outputNames.push_back(fastaFileName);
1389                                                 outputTypes["fasta"].push_back(fastaFileName);
1390                                                 uniqueNames.insert(fastaFileName);
1391                                         }
1392                                         
1393                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1394                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1395                                         
1396                                         if(qFileName != ""){
1397                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1398                                                 if (uniqueNames.count(qualFileName) == 0) {
1399                                                         outputNames.push_back(qualFileName);
1400                                                         outputTypes["qfile"].push_back(qualFileName);
1401                                                 }
1402                                                 
1403                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1404                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1405                                         }
1406                                         
1407                                         if(nameFile != ""){
1408                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1409                                                 if (uniqueNames.count(nameFileName) == 0) {
1410                                                         outputNames.push_back(nameFileName);
1411                                                         outputTypes["name"].push_back(nameFileName);
1412                                                 }
1413                                                 
1414                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1415                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1416                                         }
1417                                         
1418                                 }
1419                         }
1420                 }
1421                 numFPrimers = primers.size();
1422                 numRPrimers = revPrimer.size();
1423         numLinkers = linker.size();
1424         numSpacers = spacer.size();
1425                 
1426                 bool allBlank = true;
1427                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1428                         if (barcodeNameVector[i] != "") {
1429                                 allBlank = false;
1430                                 break;
1431                         }
1432                 }
1433                 for (int i = 0; i < primerNameVector.size(); i++) {
1434                         if (primerNameVector[i] != "") {
1435                                 allBlank = false;
1436                                 break;
1437                         }
1438                 }
1439                 
1440                 if (allBlank) {
1441                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1442                         allFiles = false;
1443                         return false;
1444                 }
1445                 
1446                 return true;
1447                 
1448         }
1449         catch(exception& e) {
1450                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1451                 exit(1);
1452         }
1453 }
1454 //***************************************************************************************************************
1455
1456 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1457         try {
1458                 bool success = 1;
1459                 if(qscores.getName() != ""){
1460                         qscores.trimQScores(-1, keepFirst);
1461                 }
1462                 sequence.trim(keepFirst);
1463                 return success;
1464         }
1465         catch(exception& e) {
1466                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1467                 exit(1);
1468         }
1469         
1470 }       
1471
1472 //***************************************************************************************************************
1473
1474 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1475         try {
1476                 bool success = 0;
1477                 
1478                 int length = sequence.getNumBases() - removeLast;
1479                 
1480                 if(length > 0){
1481                         if(qscores.getName() != ""){
1482                                 qscores.trimQScores(-1, length);
1483                         }
1484                         sequence.trim(length);
1485                         success = 1;
1486                 }
1487                 else{
1488                         success = 0;
1489                 }
1490
1491                 return success;
1492         }
1493         catch(exception& e) {
1494                 m->errorOut(e, "removeLastTrim", "countDiffs");
1495                 exit(1);
1496         }
1497         
1498 }       
1499
1500 //***************************************************************************************************************
1501
1502 bool TrimSeqsCommand::cullLength(Sequence& seq){
1503         try {
1504         
1505                 int length = seq.getNumBases();
1506                 bool success = 0;       //guilty until proven innocent
1507                 
1508                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1509                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1510                 else                                                                                            {       success = 0;    }
1511                 
1512                 return success;
1513         
1514         }
1515         catch(exception& e) {
1516                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1517                 exit(1);
1518         }
1519         
1520 }
1521
1522 //***************************************************************************************************************
1523
1524 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1525         try {
1526                 int longHomoP = seq.getLongHomoPolymer();
1527                 bool success = 0;       //guilty until proven innocent
1528                 
1529                 if(longHomoP <= maxHomoP){      success = 1;    }
1530                 else                                    {       success = 0;    }
1531                 
1532                 return success;
1533         }
1534         catch(exception& e) {
1535                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1536                 exit(1);
1537         }
1538         
1539 }
1540 //********************************************************************/
1541 string TrimSeqsCommand::reverseOligo(string oligo){
1542         try {
1543         string reverse = "";
1544         
1545         for(int i=oligo.length()-1;i>=0;i--){
1546             
1547             if(oligo[i] == 'A')         {       reverse += 'T'; }
1548             else if(oligo[i] == 'T'){   reverse += 'A'; }
1549             else if(oligo[i] == 'U'){   reverse += 'A'; }
1550             
1551             else if(oligo[i] == 'G'){   reverse += 'C'; }
1552             else if(oligo[i] == 'C'){   reverse += 'G'; }
1553             
1554             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1555             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1556             
1557             else if(oligo[i] == 'M'){   reverse += 'K'; }
1558             else if(oligo[i] == 'K'){   reverse += 'M'; }
1559             
1560             else if(oligo[i] == 'W'){   reverse += 'W'; }
1561             else if(oligo[i] == 'S'){   reverse += 'S'; }
1562             
1563             else if(oligo[i] == 'B'){   reverse += 'V'; }
1564             else if(oligo[i] == 'V'){   reverse += 'B'; }
1565             
1566             else if(oligo[i] == 'D'){   reverse += 'H'; }
1567             else if(oligo[i] == 'H'){   reverse += 'D'; }
1568             
1569             else                                                {       reverse += 'N'; }
1570         }
1571         
1572         
1573         return reverse;
1574     }
1575         catch(exception& e) {
1576                 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1577                 exit(1);
1578         }
1579 }
1580
1581 //***************************************************************************************************************
1582
1583 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1584         try {
1585                 int numNs = seq.getAmbigBases();
1586                 bool success = 0;       //guilty until proven innocent
1587                 
1588                 if(numNs <= maxAmbig)   {       success = 1;    }
1589                 else                                    {       success = 0;    }
1590                 
1591                 return success;
1592         }
1593         catch(exception& e) {
1594                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1595                 exit(1);
1596         }
1597         
1598 }
1599 //***************************************************************************************************************