]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
added pcr.seqs command. fixed bug in rarefacftion.single that caused parsing error...
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::setParameters(){        
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
22                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
23                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
24                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
25                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
26                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
27                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
28         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
29                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
30         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
31                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
32                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
33                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
34                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
35                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
36                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
37                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
38                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
39                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
40                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
41                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
42                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
43                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
44                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
45                         
46                 vector<string> myArray;
47                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string TrimSeqsCommand::getHelpString(){        
57         try {
58                 string helpString = "";
59                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
60                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
61                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
62                 helpString += "The fasta parameter is required.\n";
63                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
64                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
65                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
66                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
67                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
68                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
69                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
70                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
71                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
72                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
73         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
74                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
75                 helpString += "The qfile parameter allows you to provide a quality file.\n";
76                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
77                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
78                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
79                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
80                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
81                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
82                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
83                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
84                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
85                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
86                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
87                 helpString += "The trim.seqs command should be in the following format: \n";
88                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
89                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
90                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
91                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
92                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100
101
102 //**********************************************************************************************************************
103
104 TrimSeqsCommand::TrimSeqsCommand(){     
105         try {
106                 abort = true; calledHelp = true; 
107                 setParameters();
108                 vector<string> tempOutNames;
109                 outputTypes["fasta"] = tempOutNames;
110                 outputTypes["qfile"] = tempOutNames;
111                 outputTypes["group"] = tempOutNames;
112                 outputTypes["name"] = tempOutNames;
113         }
114         catch(exception& e) {
115                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
116                 exit(1);
117         }
118 }
119 //***************************************************************************************************************
120
121 TrimSeqsCommand::TrimSeqsCommand(string option)  {
122         try {
123                 
124                 abort = false; calledHelp = false;   
125                 comboStarts = 0;
126                 
127                 //allow user to run help
128                 if(option == "help") { help(); abort = true; calledHelp = true; }
129                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
130                 
131                 else {
132                         vector<string> myArray = setParameters();
133                         
134                         OptionParser parser(option);
135                         map<string,string> parameters = parser.getParameters();
136                         
137                         ValidParameters validParameter;
138                         map<string,string>::iterator it;
139                         
140                         //check to make sure all parameters are valid for command
141                         for (it = parameters.begin(); it != parameters.end(); it++) { 
142                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
143                         }
144                         
145                         //initialize outputTypes
146                         vector<string> tempOutNames;
147                         outputTypes["fasta"] = tempOutNames;
148                         outputTypes["qfile"] = tempOutNames;
149                         outputTypes["group"] = tempOutNames;
150                         outputTypes["name"] = tempOutNames;
151                         
152                         //if the user changes the input directory command factory will send this info to us in the output parameter 
153                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
154                         if (inputDir == "not found"){   inputDir = "";          }
155                         else {
156                                 string path;
157                                 it = parameters.find("fasta");
158                                 //user has given a template file
159                                 if(it != parameters.end()){ 
160                                         path = m->hasPath(it->second);
161                                         //if the user has not given a path then, add inputdir. else leave path alone.
162                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
163                                 }
164                                 
165                                 it = parameters.find("oligos");
166                                 //user has given a template file
167                                 if(it != parameters.end()){ 
168                                         path = m->hasPath(it->second);
169                                         //if the user has not given a path then, add inputdir. else leave path alone.
170                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
171                                 }
172                                 
173                                 it = parameters.find("qfile");
174                                 //user has given a template file
175                                 if(it != parameters.end()){ 
176                                         path = m->hasPath(it->second);
177                                         //if the user has not given a path then, add inputdir. else leave path alone.
178                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
179                                 }
180                                 
181                                 it = parameters.find("name");
182                                 //user has given a template file
183                                 if(it != parameters.end()){ 
184                                         path = m->hasPath(it->second);
185                                         //if the user has not given a path then, add inputdir. else leave path alone.
186                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
187                                 }
188                                 
189                         }
190
191                         
192                         //check for required parameters
193                         fastaFile = validParameter.validFile(parameters, "fasta", true);
194                         if (fastaFile == "not found") {                                 
195                                 fastaFile = m->getFastaFile(); 
196                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
197                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
198                         }else if (fastaFile == "not open") { abort = true; }    
199                         else { m->setFastaFile(fastaFile); }
200                         
201                         //if the user changes the output directory command factory will send this info to us in the output parameter 
202                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
203                                 outputDir = ""; 
204                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
205                         }
206                 
207                         
208                         //check for optional parameter and set defaults
209                         // ...at some point should added some additional type checking...
210                         string temp;
211                         temp = validParameter.validFile(parameters, "flip", false);
212                         if (temp == "not found")    {   flip = 0;       }
213                         else {  flip = m->isTrue(temp);         }
214                 
215                         temp = validParameter.validFile(parameters, "oligos", true);
216                         if (temp == "not found"){       oligoFile = "";         }
217                         else if(temp == "not open"){    abort = true;   } 
218                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
219                         
220                         
221                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
222                         m->mothurConvert(temp, maxAmbig);  
223
224                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
225                         m->mothurConvert(temp, maxHomoP);  
226
227                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
228                         m->mothurConvert(temp, minLength); 
229                         
230                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
231                         m->mothurConvert(temp, maxLength);
232                         
233                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
234                         m->mothurConvert(temp, bdiffs);
235                         
236                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
237                         m->mothurConvert(temp, pdiffs);
238             
239             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
240                         m->mothurConvert(temp, ldiffs);
241             
242             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
243                         m->mothurConvert(temp, sdiffs);
244                         
245                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
246                         m->mothurConvert(temp, tdiffs);
247                         
248                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
249                         
250                         temp = validParameter.validFile(parameters, "qfile", true);     
251                         if (temp == "not found")        {       qFileName = "";         }
252                         else if(temp == "not open")     {       abort = true;           }
253                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
254                         
255                         temp = validParameter.validFile(parameters, "name", true);      
256                         if (temp == "not found")        {       nameFile = "";          }
257                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
258                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
259                         
260                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
261                         m->mothurConvert(temp, qThreshold);
262                         
263                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
264                         qtrim = m->isTrue(temp);
265
266                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
267                         convert(temp, qRollAverage);
268
269                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
270                         convert(temp, qWindowAverage);
271
272                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
273                         convert(temp, qWindowSize);
274
275                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
276                         convert(temp, qWindowStep);
277
278                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
279                         convert(temp, qAverage);
280
281                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
282                         convert(temp, keepFirst);
283
284                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
285                         convert(temp, removeLast);
286                         
287                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
288                         allFiles = m->isTrue(temp);
289             
290             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
291                         keepforward = m->isTrue(temp);
292                         
293                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
294                         m->setProcessors(temp);
295                         m->mothurConvert(temp, processors); 
296                         
297                         
298                         if(allFiles && (oligoFile == "")){
299                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
300                         }
301                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
302                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
303                                 qAverage=0;
304                                 qThreshold=0;
305                         }
306                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
307                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
308                                 abort = true;
309                         }
310                         
311                         if (nameFile == "") {
312                                 vector<string> files; files.push_back(fastaFile);
313                                 parser.getNameFile(files);
314                         }
315                 }
316
317         }
318         catch(exception& e) {
319                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
320                 exit(1);
321         }
322 }
323 //***************************************************************************************************************
324
325 int TrimSeqsCommand::execute(){
326         try{
327         
328                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
329                 
330                 numFPrimers = 0;  //this needs to be initialized
331                 numRPrimers = 0;
332                 createGroup = false;
333                 vector<vector<string> > fastaFileNames;
334                 vector<vector<string> > qualFileNames;
335                 vector<vector<string> > nameFileNames;
336                 
337                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
338                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
339                 
340                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
341                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
342                 
343                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
344                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
345                 
346                 if (qFileName != "") {
347                         outputNames.push_back(trimQualFile);
348                         outputNames.push_back(scrapQualFile);
349                         outputTypes["qfile"].push_back(trimQualFile);
350                         outputTypes["qfile"].push_back(scrapQualFile); 
351                 }
352                 
353                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
354                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
355                 
356                 if (nameFile != "") {
357                         m->readNames(nameFile, nameMap);
358                         outputNames.push_back(trimNameFile);
359                         outputNames.push_back(scrapNameFile);
360                         outputTypes["name"].push_back(trimNameFile);
361                         outputTypes["name"].push_back(scrapNameFile); 
362                 }
363                 
364                 if (m->control_pressed) { return 0; }
365                 
366                 string outputGroupFileName;
367                 if(oligoFile != ""){
368                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
369                         if (createGroup) {
370                                 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
371                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
372                         }
373                 }
374        
375         //fills lines and qlines
376                 setLines(fastaFile, qFileName);
377                 
378         if(processors == 1){
379             driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
380         }else{
381             createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
382         }       
383                 
384                 
385                 if (m->control_pressed) {  return 0; }                  
386         
387                 if(allFiles){
388                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
389                         map<string, string>::iterator it;
390                         set<string> namesToRemove;
391                         for(int i=0;i<fastaFileNames.size();i++){
392                                 for(int j=0;j<fastaFileNames[0].size();j++){
393                                         if (fastaFileNames[i][j] != "") {
394                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
395                                                         if(m->isBlank(fastaFileNames[i][j])){
396                                                                 m->mothurRemove(fastaFileNames[i][j]);
397                                                                 namesToRemove.insert(fastaFileNames[i][j]);
398                                                         
399                                                                 if(qFileName != ""){
400                                                                         m->mothurRemove(qualFileNames[i][j]);
401                                                                         namesToRemove.insert(qualFileNames[i][j]);
402                                                                 }
403                                                                 
404                                                                 if(nameFile != ""){
405                                                                         m->mothurRemove(nameFileNames[i][j]);
406                                                                         namesToRemove.insert(nameFileNames[i][j]);
407                                                                 }
408                                                         }else{  
409                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
410                                                                 if (it == uniqueFastaNames.end()) {     
411                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
412                                                                 }       
413                                                         }
414                                                 }
415                                         }
416                                 }
417                         }
418                         
419                         //remove names for outputFileNames, just cleans up the output
420                         vector<string> outputNames2;
421                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
422                         outputNames = outputNames2;
423                         
424                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
425                                 ifstream in;
426                                 m->openInputFile(it->first, in);
427                                 
428                                 ofstream out;
429                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
430                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
431                                 m->openOutputFile(thisGroupName, out);
432                                 
433                                 while (!in.eof()){
434                                         if (m->control_pressed) { break; }
435                                         
436                                         Sequence currSeq(in); m->gobble(in);
437                                         out << currSeq.getName() << '\t' << it->second << endl;
438                                 }
439                                 in.close();
440                                 out.close();
441                         }
442                 }
443                 
444                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
445
446                 //output group counts
447                 m->mothurOutEndLine();
448                 int total = 0;
449                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
450                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
451                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
452                 }
453                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
454                 
455                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
456
457                 //set fasta file as new current fastafile
458                 string current = "";
459                 itTypes = outputTypes.find("fasta");
460                 if (itTypes != outputTypes.end()) {
461                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
462                 }
463                 
464                 itTypes = outputTypes.find("name");
465                 if (itTypes != outputTypes.end()) {
466                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
467                 }
468                 
469                 itTypes = outputTypes.find("qfile");
470                 if (itTypes != outputTypes.end()) {
471                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
472                 }
473                 
474                 itTypes = outputTypes.find("group");
475                 if (itTypes != outputTypes.end()) {
476                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
477                 }
478
479                 m->mothurOutEndLine();
480                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
481                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
482                 m->mothurOutEndLine();
483                 
484                 return 0;       
485                         
486         }
487         catch(exception& e) {
488                 m->errorOut(e, "TrimSeqsCommand", "execute");
489                 exit(1);
490         }
491 }
492                 
493 /**************************************************************************************/
494
495 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {      
496                 
497         try {
498                 
499                 ofstream trimFASTAFile;
500                 m->openOutputFile(trimFileName, trimFASTAFile);
501                 
502                 ofstream scrapFASTAFile;
503                 m->openOutputFile(scrapFileName, scrapFASTAFile);
504                 
505                 ofstream trimQualFile;
506                 ofstream scrapQualFile;
507                 if(qFileName != ""){
508                         m->openOutputFile(trimQFileName, trimQualFile);
509                         m->openOutputFile(scrapQFileName, scrapQualFile);
510                 }
511                 
512                 ofstream trimNameFile;
513                 ofstream scrapNameFile;
514                 if(nameFile != ""){
515                         m->openOutputFile(trimNFileName, trimNameFile);
516                         m->openOutputFile(scrapNFileName, scrapNameFile);
517                 }
518                 
519                 
520                 ofstream outGroupsFile;
521                 if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
522                 if(allFiles){
523                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
524                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
525                                         if (fastaFileNames[i][j] != "") {
526                                                 ofstream temp;
527                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
528                                                 if(qFileName != ""){
529                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
530                                                 }
531                                                 
532                                                 if(nameFile != ""){
533                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
534                                                 }
535                                         }
536                                 }
537                         }
538                 }
539                 
540                 ifstream inFASTA;
541                 m->openInputFile(filename, inFASTA);
542                 inFASTA.seekg(line.start);
543                 
544                 ifstream qFile;
545                 if(qFileName != "")     {
546                         m->openInputFile(qFileName, qFile);
547                         qFile.seekg(qline.start);  
548                 }
549                 
550                 int count = 0;
551                 bool moreSeqs = 1;
552                 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
553         
554                 while (moreSeqs) {
555                                 
556                         if (m->control_pressed) { 
557                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
558                                 if (createGroup) {       outGroupsFile.close();   }
559
560                                 if(qFileName != ""){
561                                         qFile.close();
562                                 }
563                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
564
565                                 return 0;
566                         }
567                         
568                         int success = 1;
569                         string trashCode = "";
570                         int currentSeqsDiffs = 0;
571
572                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
573                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
574                         QualityScores currQual;
575                         if(qFileName != ""){
576                                 currQual = QualityScores(qFile);  m->gobble(qFile);
577                         }
578                         
579                         string origSeq = currSeq.getUnaligned();
580                         if (origSeq != "") {
581                                 
582                                 int barcodeIndex = 0;
583                                 int primerIndex = 0;
584                                 
585                 if(numLinkers != 0){
586                                         success = trimOligos.stripLinker(currSeq, currQual);
587                                         if(success > ldiffs)            {       trashCode += 'k';       }
588                                         else{ currentSeqsDiffs += success;  }
589
590                                 }
591                 
592                                 if(barcodes.size() != 0){
593                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
594                                         if(success > bdiffs)            {       trashCode += 'b';       }
595                                         else{ currentSeqsDiffs += success;  }
596                                 }
597                                 
598                 if(numSpacers != 0){
599                                         success = trimOligos.stripSpacer(currSeq, currQual);
600                                         if(success > sdiffs)            {       trashCode += 's';       }
601                                         else{ currentSeqsDiffs += success;  }
602
603                                 }
604                 
605                                 if(numFPrimers != 0){
606                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
607                                         if(success > pdiffs)            {       trashCode += 'f';       }
608                                         else{ currentSeqsDiffs += success;  }
609                                 }
610                                 
611                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
612                                 
613                                 if(numRPrimers != 0){
614                                         success = trimOligos.stripReverse(currSeq, currQual);
615                                         if(!success)                            {       trashCode += 'r';       }
616                                 }
617
618                                 if(keepFirst != 0){
619                                         success = keepFirstTrim(currSeq, currQual);
620                                 }
621                                 
622                                 if(removeLast != 0){
623                                         success = removeLastTrim(currSeq, currQual);
624                                         if(!success)                            {       trashCode += 'l';       }
625                                 }
626
627                                 
628                                 if(qFileName != ""){
629                                         int origLength = currSeq.getNumBases();
630                                         
631                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
632                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
633                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
634                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
635                                         else                                            {       success = 1;                            }
636                                         
637                                         //you don't want to trim, if it fails above then scrap it
638                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
639                                         
640                                         if(!success)                            {       trashCode += 'q';       }
641                                 }                               
642                 
643                                 if(minLength > 0 || maxLength > 0){
644                                         success = cullLength(currSeq);
645                                         if(!success)                            {       trashCode += 'l';       }
646                                 }
647                                 if(maxHomoP > 0){
648                                         success = cullHomoP(currSeq);
649                                         if(!success)                            {       trashCode += 'h';       }
650                                 }
651                                 if(maxAmbig != -1){
652                                         success = cullAmbigs(currSeq);
653                                         if(!success)                            {       trashCode += 'n';       }
654                                 }
655                                 
656                                 if(flip){               // should go last                       
657                                         currSeq.reverseComplement();
658                                         if(qFileName != ""){
659                                                 currQual.flipQScores(); 
660                                         }
661                                 }
662                                 
663                                 if(trashCode.length() == 0){
664                                         currSeq.setAligned(currSeq.getUnaligned());
665                                         currSeq.printSequence(trimFASTAFile);
666                                         
667                                         if(qFileName != ""){
668                                                 currQual.printQScores(trimQualFile);
669                                         }
670                                         
671                                         if(nameFile != ""){
672                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
673                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
674                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
675                                         }
676                                         
677                                         if (createGroup) {
678                                                 if(barcodes.size() != 0){
679                                                         string thisGroup = barcodeNameVector[barcodeIndex];
680                                                         if (primers.size() != 0) { 
681                                                                 if (primerNameVector[primerIndex] != "") { 
682                                                                         if(thisGroup != "") {
683                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
684                                                                         }else {
685                                                                                 thisGroup = primerNameVector[primerIndex]; 
686                                                                         }
687                                                                 } 
688                                                         }
689                                                         
690                                                         outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
691                                                         
692                                                         if (nameFile != "") {
693                                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
694                                                                 if (itName != nameMap.end()) { 
695                                                                         vector<string> thisSeqsNames; 
696                                                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
697                                                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
698                                                                                 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
699                                                                         }
700                                                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
701                                                         }
702                                                         
703                                                         map<string, int>::iterator it = groupCounts.find(thisGroup);
704                                                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
705                                                         else { groupCounts[it->first]++; }
706                                                                 
707                                                 }
708                                         }
709                                         
710                                         if(allFiles){
711                                                 ofstream output;
712                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
713                                                 currSeq.printSequence(output);
714                                                 output.close();
715                                                 
716                                                 if(qFileName != ""){
717                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
718                                                         currQual.printQScores(output);
719                                                         output.close();                                                 
720                                                 }
721                                                 
722                                                 if(nameFile != ""){
723                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
724                                                         if (itName != nameMap.end()) { 
725                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
726                                                                 output << itName->first << '\t' << itName->second << endl; 
727                                                                 output.close();
728                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
729                                                 }
730                                         }
731                                 }
732                                 else{
733                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
734                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
735                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
736                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
737                                         }
738                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
739                                         currSeq.setUnaligned(origSeq);
740                                         currSeq.setAligned(origSeq);
741                                         currSeq.printSequence(scrapFASTAFile);
742                                         if(qFileName != ""){
743                                                 currQual.printQScores(scrapQualFile);
744                                         }
745                                 }
746                                 count++;
747                         }
748                         
749                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
750                                 unsigned long long pos = inFASTA.tellg();
751                                 if ((pos == -1) || (pos >= line.end)) { break; }
752                         
753                         #else
754                                 if (inFASTA.eof()) { break; }
755                         #endif
756                         
757                         //report progress
758                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
759                         
760                 }
761                 //report progress
762                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
763                 
764                 
765                 inFASTA.close();
766                 trimFASTAFile.close();
767                 scrapFASTAFile.close();
768                 if (createGroup) {       outGroupsFile.close();   }
769                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
770                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
771                 
772                 return count;
773         }
774         catch(exception& e) {
775                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
776                 exit(1);
777         }
778 }
779
780 /**************************************************************************************************/
781
782 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
783         try {
784         
785         int process = 1;
786                 int exitCommand = 1;
787                 processIDS.clear();
788                 
789 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
790                                 //loop through and create all the processes you want
791                 while (process != processors) {
792                         int pid = fork();
793                         
794                         if (pid > 0) {
795                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
796                                 process++;
797                         }else if (pid == 0){
798                                 
799                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
800                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
801                                 vector<vector<string> > tempNameFileNames = nameFileNames;
802
803                                 if(allFiles){
804                                         ofstream temp;
805
806                                         for(int i=0;i<tempFASTAFileNames.size();i++){
807                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
808                                                         if (tempFASTAFileNames[i][j] != "") {
809                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
810                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
811
812                                                                 if(qFileName != ""){
813                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
814                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
815                                                                 }
816                                                                 if(nameFile != ""){
817                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
818                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
819                                                                 }
820                                                         }
821                                                 }
822                                         }
823                                 }
824                                                         
825                                 driverCreateTrim(filename,
826                                                                  qFileName,
827                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
828                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
829                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
830                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
831                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
832                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
833                                                                  (groupFile + toString(getpid()) + ".temp"),
834                                                                  tempFASTAFileNames,
835                                                                  tempPrimerQualFileNames,
836                                                                  tempNameFileNames,
837                                                                  lines[process],
838                                                                  qLines[process]);
839                                 
840                                 //pass groupCounts to parent
841                                 if(createGroup){
842                                         ofstream out;
843                                         string tempFile = filename + toString(getpid()) + ".num.temp";
844                                         m->openOutputFile(tempFile, out);
845                                         
846                                         out << groupCounts.size() << endl;
847                                         
848                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
849                                                 out << it->first << '\t' << it->second << endl;
850                                         }
851                                         out.close();
852                                 }
853                                 exit(0);
854                         }else { 
855                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
856                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
857                                 exit(0);
858                         }
859                 }
860                 
861                 //parent do my part
862                 ofstream temp;
863                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
864                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
865                 if(qFileName != ""){
866                         m->openOutputFile(trimQualFileName, temp);              temp.close();
867                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
868                 }
869                 if (nameFile != "") {
870                         m->openOutputFile(trimNameFileName, temp);              temp.close();
871                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
872                 }
873
874                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
875                 
876                 //force parent to wait until all the processes are done
877                 for (int i=0;i<processIDS.size();i++) { 
878                         int temp = processIDS[i];
879                         wait(&temp);
880                 }
881 #else
882         //////////////////////////////////////////////////////////////////////////////////////////////////////
883                 //Windows version shared memory, so be careful when passing variables through the trimData struct. 
884                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
885                 //////////////////////////////////////////////////////////////////////////////////////////////////////
886                 
887                 vector<trimData*> pDataArray; 
888                 DWORD   dwThreadIdArray[processors-1];
889                 HANDLE  hThreadArray[processors-1]; 
890                 
891                 //Create processor worker threads.
892                 for( int i=0; i<processors-1; i++){
893                         
894             string extension = "";
895                         if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
896             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
897             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
898             vector<vector<string> > tempNameFileNames = nameFileNames;
899             
900             if(allFiles){
901                 ofstream temp;
902                 
903                 for(int i=0;i<tempFASTAFileNames.size();i++){
904                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
905                         if (tempFASTAFileNames[i][j] != "") {
906                             tempFASTAFileNames[i][j] += extension;
907                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
908                             
909                             if(qFileName != ""){
910                                 tempPrimerQualFileNames[i][j] += extension;
911                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
912                             }
913                             if(nameFile != ""){
914                                 tempNameFileNames[i][j] += extension;
915                                 m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
916                             }
917                         }
918                     }
919                 }
920             }
921
922             
923                         trimData* tempTrim = new trimData(filename,
924                                               qFileName, nameFile,
925                                               (trimFASTAFileName+extension),
926                                               (scrapFASTAFileName+extension),
927                                               (trimQualFileName+extension),
928                                               (scrapQualFileName+extension),
929                                               (trimNameFileName+extension),
930                                               (scrapNameFileName+extension),
931                                               (groupFile+extension),
932                                               tempFASTAFileNames,
933                                               tempPrimerQualFileNames,
934                                               tempNameFileNames,
935                                               lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
936                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, 
937                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
938                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
939                                              minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
940                         pDataArray.push_back(tempTrim);
941             
942                         hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
943                 }
944         
945         //parent do my part
946                 ofstream temp;
947                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
948                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
949                 if(qFileName != ""){
950                         m->openOutputFile(trimQualFileName, temp);              temp.close();
951                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
952                 }
953                 if (nameFile != "") {
954                         m->openOutputFile(trimNameFileName, temp);              temp.close();
955                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
956                 }
957         
958                 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
959         processIDS.push_back(processors-1);
960
961         
962                 //Wait until all threads have terminated.
963                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
964                 
965                 //Close all thread handles and free memory allocations.
966                 for(int i=0; i < pDataArray.size(); i++){
967                         for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
968                 map<string, int>::iterator it2 = groupCounts.find(it->first);
969                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
970                 else { groupCounts[it->first] += it->second; }
971             }
972             CloseHandle(hThreadArray[i]);
973                         delete pDataArray[i];
974                 }
975         
976 #endif          
977         
978         
979         //append files
980                 for(int i=0;i<processIDS.size();i++){
981                         
982                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
983                         
984                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
985                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
986                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
987                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
988                         
989                         if(qFileName != ""){
990                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
991                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
992                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
993                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
994                         }
995                         
996                         if(nameFile != ""){
997                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
998                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
999                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1000                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1001                         }
1002                         
1003                         if(createGroup){
1004                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1005                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1006                         }
1007                         
1008                         
1009                         if(allFiles){
1010                                 for(int j=0;j<fastaFileNames.size();j++){
1011                                         for(int k=0;k<fastaFileNames[j].size();k++){
1012                                                 if (fastaFileNames[j][k] != "") {
1013                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1014                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1015                                                         
1016                                                         if(qFileName != ""){
1017                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1018                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1019                                                         }
1020                                                         
1021                                                         if(nameFile != ""){
1022                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1023                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1024                                                         }
1025                                                 }
1026                                         }
1027                                 }
1028                         }
1029                         
1030             #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1031                         if(createGroup){
1032                                 ifstream in;
1033                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1034                                 m->openInputFile(tempFile, in);
1035                                 int tempNum;
1036                                 string group;
1037                                 
1038                                 in >> tempNum; m->gobble(in);
1039                                 
1040                                 if (tempNum != 0) {
1041                                         while (!in.eof()) { 
1042                                                 in >> group >> tempNum; m->gobble(in);
1043                         
1044                                                 map<string, int>::iterator it = groupCounts.find(group);
1045                                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
1046                                                 else { groupCounts[it->first] += tempNum; }
1047                                         }
1048                                 }
1049                                 in.close(); m->mothurRemove(tempFile);
1050                         }
1051             #endif
1052                 }
1053
1054         return exitCommand;
1055         }
1056         catch(exception& e) {
1057                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1058                 exit(1);
1059         }
1060 }
1061
1062 /**************************************************************************************************/
1063
1064 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1065         try {
1066         
1067         vector<unsigned long long> fastaFilePos;
1068                 vector<unsigned long long> qfileFilePos;
1069                 
1070                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1071                 //set file positions for fasta file
1072                 fastaFilePos = m->divideFile(filename, processors);
1073                 
1074                 //get name of first sequence in each chunk
1075                 map<string, int> firstSeqNames;
1076                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1077                         ifstream in;
1078                         m->openInputFile(filename, in);
1079                         in.seekg(fastaFilePos[i]);
1080                 
1081                         Sequence temp(in); 
1082                         firstSeqNames[temp.getName()] = i;
1083                 
1084                         in.close();
1085                 }
1086                 
1087                 if(qfilename != "")     {
1088             //seach for filePos of each first name in the qfile and save in qfileFilePos
1089             ifstream inQual;
1090             m->openInputFile(qfilename, inQual);
1091             
1092             string input;
1093             while(!inQual.eof()){       
1094                 input = m->getline(inQual);
1095                 
1096                 if (input.length() != 0) {
1097                     if(input[0] == '>'){ //this is a sequence name line
1098                         istringstream nameStream(input);
1099                         
1100                         string sname = "";  nameStream >> sname;
1101                         sname = sname.substr(1);
1102                         
1103                         map<string, int>::iterator it = firstSeqNames.find(sname);
1104                         
1105                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
1106                             unsigned long long pos = inQual.tellg(); 
1107                             qfileFilePos.push_back(pos - input.length() - 1);   
1108                             firstSeqNames.erase(it);
1109                         }
1110                     }
1111                 }
1112                 
1113                 if (firstSeqNames.size() == 0) { break; }
1114             }
1115             inQual.close();
1116             
1117             
1118             if (firstSeqNames.size() != 0) { 
1119                 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1120                     m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1121                 }
1122                 qFileName = "";
1123                 return processors;
1124             }
1125             
1126             //get last file position of qfile
1127             FILE * pFile;
1128             unsigned long long size;
1129             
1130             //get num bytes in file
1131             pFile = fopen (qfilename.c_str(),"rb");
1132             if (pFile==NULL) perror ("Error opening file");
1133             else{
1134                 fseek (pFile, 0, SEEK_END);
1135                 size=ftell (pFile);
1136                 fclose (pFile);
1137             }
1138             
1139             qfileFilePos.push_back(size);
1140         }
1141         
1142         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1143                         lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1144                         if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
1145                 }       
1146                 if(qfilename == "")     {       qLines = lines; } //files with duds
1147                 
1148                 return processors;
1149                 
1150                 #else
1151             
1152         if (processors == 1) { //save time
1153                         //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1154                         //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1155             lines.push_back(linePair(0, 1000));
1156             if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
1157         }else{
1158             int numFastaSeqs = 0;
1159             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
1160             if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1161         
1162             if (qfilename != "") { 
1163                 int numQualSeqs = 0;
1164                 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
1165                 
1166                 if (numFastaSeqs != numQualSeqs) {
1167                     m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
1168                 }
1169             }
1170         
1171             //figure out how many sequences you have to process
1172             int numSeqsPerProcessor = numFastaSeqs / processors;
1173             for (int i = 0; i < processors; i++) {
1174                 int startIndex =  i * numSeqsPerProcessor;
1175                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
1176                 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1177                 cout << fastaFilePos[startIndex] << '\t' << numSeqsPerProcessor << endl;
1178                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1179             }
1180         
1181             if(qfilename == "") {       qLines = lines; } //files with duds
1182         }
1183                         return 1;
1184                 
1185                 #endif
1186         }
1187         catch(exception& e) {
1188                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1189                 exit(1);
1190         }
1191 }
1192
1193 //***************************************************************************************************************
1194
1195 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1196         try {
1197                 ifstream inOligos;
1198                 m->openInputFile(oligoFile, inOligos);
1199                 
1200                 ofstream test;
1201                 
1202                 string type, oligo, group;
1203
1204                 int indexPrimer = 0;
1205                 int indexBarcode = 0;
1206                 
1207                 while(!inOligos.eof()){
1208
1209                         inOligos >> type; 
1210                                         
1211                         if(type[0] == '#'){
1212                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1213                                 m->gobble(inOligos);
1214                         }
1215                         else{
1216                                 m->gobble(inOligos);
1217                                 //make type case insensitive
1218                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1219                                 
1220                                 inOligos >> oligo;
1221                                 
1222                                 for(int i=0;i<oligo.length();i++){
1223                                         oligo[i] = toupper(oligo[i]);
1224                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1225                                 }
1226                                 
1227                                 if(type == "FORWARD"){
1228                                         group = "";
1229                                         
1230                                         // get rest of line in case there is a primer name
1231                                         while (!inOligos.eof()) {       
1232                                                 char c = inOligos.get(); 
1233                                                 if (c == 10 || c == 13){        break;  }
1234                                                 else if (c == 32 || c == 9){;} //space or tab
1235                                                 else {  group += c;  }
1236                                         } 
1237                                         
1238                                         //check for repeat barcodes
1239                                         map<string, int>::iterator itPrime = primers.find(oligo);
1240                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1241                                         
1242                                         primers[oligo]=indexPrimer; indexPrimer++;              
1243                                         primerNameVector.push_back(group);
1244                                 }
1245                                 else if(type == "REVERSE"){
1246                                         //Sequence oligoRC("reverse", oligo);
1247                                         //oligoRC.reverseComplement();
1248                     string oligoRC = reverseOligo(oligo);
1249                                         revPrimer.push_back(oligoRC);
1250                                 }
1251                                 else if(type == "BARCODE"){
1252                                         inOligos >> group;
1253                                         
1254                                         //check for repeat barcodes
1255                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1256                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1257                                                 
1258                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1259                                         barcodeNameVector.push_back(group);
1260                                 }else if(type == "LINKER"){
1261                                         linker.push_back(oligo);
1262                                 }else if(type == "SPACER"){
1263                                         spacer.push_back(oligo);
1264                                 }
1265                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
1266                         }
1267                         m->gobble(inOligos);
1268                 }       
1269                 inOligos.close();
1270                 
1271                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1272                 
1273                 //add in potential combos
1274                 if(barcodeNameVector.size() == 0){
1275                         barcodes[""] = 0;
1276                         barcodeNameVector.push_back("");                        
1277                 }
1278                 
1279                 if(primerNameVector.size() == 0){
1280                         primers[""] = 0;
1281                         primerNameVector.push_back("");                 
1282                 }
1283                 
1284                 fastaFileNames.resize(barcodeNameVector.size());
1285                 for(int i=0;i<fastaFileNames.size();i++){
1286                         fastaFileNames[i].assign(primerNameVector.size(), "");
1287                 }
1288                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1289                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1290                 
1291                 if(allFiles){
1292                         set<string> uniqueNames; //used to cleanup outputFileNames
1293                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1294                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1295                                         
1296                                         string primerName = primerNameVector[itPrimer->second];
1297                                         string barcodeName = barcodeNameVector[itBar->second];
1298                                         
1299                                         string comboGroupName = "";
1300                                         string fastaFileName = "";
1301                                         string qualFileName = "";
1302                                         string nameFileName = "";
1303                                         
1304                                         if(primerName == ""){
1305                                                 comboGroupName = barcodeNameVector[itBar->second];
1306                                         }
1307                                         else{
1308                                                 if(barcodeName == ""){
1309                                                         comboGroupName = primerNameVector[itPrimer->second];
1310                                                 }
1311                                                 else{
1312                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1313                                                 }
1314                                         }
1315                                         
1316                                         
1317                                         ofstream temp;
1318                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1319                                         if (uniqueNames.count(fastaFileName) == 0) {
1320                                                 outputNames.push_back(fastaFileName);
1321                                                 outputTypes["fasta"].push_back(fastaFileName);
1322                                                 uniqueNames.insert(fastaFileName);
1323                                         }
1324                                         
1325                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1326                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1327                                         
1328                                         if(qFileName != ""){
1329                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1330                                                 if (uniqueNames.count(qualFileName) == 0) {
1331                                                         outputNames.push_back(qualFileName);
1332                                                         outputTypes["qfile"].push_back(qualFileName);
1333                                                 }
1334                                                 
1335                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1336                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1337                                         }
1338                                         
1339                                         if(nameFile != ""){
1340                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1341                                                 if (uniqueNames.count(nameFileName) == 0) {
1342                                                         outputNames.push_back(nameFileName);
1343                                                         outputTypes["name"].push_back(nameFileName);
1344                                                 }
1345                                                 
1346                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1347                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1348                                         }
1349                                         
1350                                 }
1351                         }
1352                 }
1353                 numFPrimers = primers.size();
1354                 numRPrimers = revPrimer.size();
1355         numLinkers = linker.size();
1356         numSpacers = spacer.size();
1357                 
1358                 bool allBlank = true;
1359                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1360                         if (barcodeNameVector[i] != "") {
1361                                 allBlank = false;
1362                                 break;
1363                         }
1364                 }
1365                 for (int i = 0; i < primerNameVector.size(); i++) {
1366                         if (primerNameVector[i] != "") {
1367                                 allBlank = false;
1368                                 break;
1369                         }
1370                 }
1371                 
1372                 if (allBlank) {
1373                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1374                         allFiles = false;
1375                         return false;
1376                 }
1377                 
1378                 return true;
1379                 
1380         }
1381         catch(exception& e) {
1382                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1383                 exit(1);
1384         }
1385 }
1386 //***************************************************************************************************************
1387
1388 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1389         try {
1390                 bool success = 1;
1391                 if(qscores.getName() != ""){
1392                         qscores.trimQScores(-1, keepFirst);
1393                 }
1394                 sequence.trim(keepFirst);
1395                 return success;
1396         }
1397         catch(exception& e) {
1398                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1399                 exit(1);
1400         }
1401         
1402 }       
1403
1404 //***************************************************************************************************************
1405
1406 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1407         try {
1408                 bool success = 0;
1409                 
1410                 int length = sequence.getNumBases() - removeLast;
1411                 
1412                 if(length > 0){
1413                         if(qscores.getName() != ""){
1414                                 qscores.trimQScores(-1, length);
1415                         }
1416                         sequence.trim(length);
1417                         success = 1;
1418                 }
1419                 else{
1420                         success = 0;
1421                 }
1422
1423                 return success;
1424         }
1425         catch(exception& e) {
1426                 m->errorOut(e, "removeLastTrim", "countDiffs");
1427                 exit(1);
1428         }
1429         
1430 }       
1431
1432 //***************************************************************************************************************
1433
1434 bool TrimSeqsCommand::cullLength(Sequence& seq){
1435         try {
1436         
1437                 int length = seq.getNumBases();
1438                 bool success = 0;       //guilty until proven innocent
1439                 
1440                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1441                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1442                 else                                                                                            {       success = 0;    }
1443                 
1444                 return success;
1445         
1446         }
1447         catch(exception& e) {
1448                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1449                 exit(1);
1450         }
1451         
1452 }
1453
1454 //***************************************************************************************************************
1455
1456 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1457         try {
1458                 int longHomoP = seq.getLongHomoPolymer();
1459                 bool success = 0;       //guilty until proven innocent
1460                 
1461                 if(longHomoP <= maxHomoP){      success = 1;    }
1462                 else                                    {       success = 0;    }
1463                 
1464                 return success;
1465         }
1466         catch(exception& e) {
1467                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1468                 exit(1);
1469         }
1470         
1471 }
1472 //********************************************************************/
1473 string TrimSeqsCommand::reverseOligo(string oligo){
1474         try {
1475         string reverse = "";
1476         
1477         for(int i=oligo.length()-1;i>=0;i--){
1478             
1479             if(oligo[i] == 'A')         {       reverse += 'T'; }
1480             else if(oligo[i] == 'T'){   reverse += 'A'; }
1481             else if(oligo[i] == 'U'){   reverse += 'A'; }
1482             
1483             else if(oligo[i] == 'G'){   reverse += 'C'; }
1484             else if(oligo[i] == 'C'){   reverse += 'G'; }
1485             
1486             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1487             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1488             
1489             else if(oligo[i] == 'M'){   reverse += 'K'; }
1490             else if(oligo[i] == 'K'){   reverse += 'M'; }
1491             
1492             else if(oligo[i] == 'W'){   reverse += 'W'; }
1493             else if(oligo[i] == 'S'){   reverse += 'S'; }
1494             
1495             else if(oligo[i] == 'B'){   reverse += 'V'; }
1496             else if(oligo[i] == 'V'){   reverse += 'B'; }
1497             
1498             else if(oligo[i] == 'D'){   reverse += 'H'; }
1499             else if(oligo[i] == 'H'){   reverse += 'D'; }
1500             
1501             else                                                {       reverse += 'N'; }
1502         }
1503         
1504         
1505         return reverse;
1506     }
1507         catch(exception& e) {
1508                 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1509                 exit(1);
1510         }
1511 }
1512
1513 //***************************************************************************************************************
1514
1515 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1516         try {
1517                 int numNs = seq.getAmbigBases();
1518                 bool success = 0;       //guilty until proven innocent
1519                 
1520                 if(numNs <= maxAmbig)   {       success = 1;    }
1521                 else                                    {       success = 0;    }
1522                 
1523                 return success;
1524         }
1525         catch(exception& e) {
1526                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1527                 exit(1);
1528         }
1529         
1530 }
1531 //***************************************************************************************************************