]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
paralellized trim.seqs for windows. fixed bug in trim.flows that stopped fasta data...
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::setParameters(){        
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
22                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
23                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
24                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
25                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
26                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
27                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
28         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
29                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
30         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
31                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
32                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
33                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
34                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
35                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
36                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
37                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
38                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
39                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
40                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
41                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
42                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
43                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
44                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
45                         
46                 vector<string> myArray;
47                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string TrimSeqsCommand::getHelpString(){        
57         try {
58                 string helpString = "";
59                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
60                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
61                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
62                 helpString += "The fasta parameter is required.\n";
63                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
64                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
65                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
66                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
67                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
68                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
69                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
70                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
71                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
72                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
73         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
74                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
75                 helpString += "The qfile parameter allows you to provide a quality file.\n";
76                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
77                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
78                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
79                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
80                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
81                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
82                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
83                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
84                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
85                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
86                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
87                 helpString += "The trim.seqs command should be in the following format: \n";
88                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
89                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
90                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
91                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
92                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100
101
102 //**********************************************************************************************************************
103
104 TrimSeqsCommand::TrimSeqsCommand(){     
105         try {
106                 abort = true; calledHelp = true; 
107                 setParameters();
108                 vector<string> tempOutNames;
109                 outputTypes["fasta"] = tempOutNames;
110                 outputTypes["qfile"] = tempOutNames;
111                 outputTypes["group"] = tempOutNames;
112                 outputTypes["name"] = tempOutNames;
113         }
114         catch(exception& e) {
115                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
116                 exit(1);
117         }
118 }
119 //***************************************************************************************************************
120
121 TrimSeqsCommand::TrimSeqsCommand(string option)  {
122         try {
123                 
124                 abort = false; calledHelp = false;   
125                 comboStarts = 0;
126                 
127                 //allow user to run help
128                 if(option == "help") { help(); abort = true; calledHelp = true; }
129                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
130                 
131                 else {
132                         vector<string> myArray = setParameters();
133                         
134                         OptionParser parser(option);
135                         map<string,string> parameters = parser.getParameters();
136                         
137                         ValidParameters validParameter;
138                         map<string,string>::iterator it;
139                         
140                         //check to make sure all parameters are valid for command
141                         for (it = parameters.begin(); it != parameters.end(); it++) { 
142                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
143                         }
144                         
145                         //initialize outputTypes
146                         vector<string> tempOutNames;
147                         outputTypes["fasta"] = tempOutNames;
148                         outputTypes["qfile"] = tempOutNames;
149                         outputTypes["group"] = tempOutNames;
150                         outputTypes["name"] = tempOutNames;
151                         
152                         //if the user changes the input directory command factory will send this info to us in the output parameter 
153                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
154                         if (inputDir == "not found"){   inputDir = "";          }
155                         else {
156                                 string path;
157                                 it = parameters.find("fasta");
158                                 //user has given a template file
159                                 if(it != parameters.end()){ 
160                                         path = m->hasPath(it->second);
161                                         //if the user has not given a path then, add inputdir. else leave path alone.
162                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
163                                 }
164                                 
165                                 it = parameters.find("oligos");
166                                 //user has given a template file
167                                 if(it != parameters.end()){ 
168                                         path = m->hasPath(it->second);
169                                         //if the user has not given a path then, add inputdir. else leave path alone.
170                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
171                                 }
172                                 
173                                 it = parameters.find("qfile");
174                                 //user has given a template file
175                                 if(it != parameters.end()){ 
176                                         path = m->hasPath(it->second);
177                                         //if the user has not given a path then, add inputdir. else leave path alone.
178                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
179                                 }
180                                 
181                                 it = parameters.find("name");
182                                 //user has given a template file
183                                 if(it != parameters.end()){ 
184                                         path = m->hasPath(it->second);
185                                         //if the user has not given a path then, add inputdir. else leave path alone.
186                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
187                                 }
188                                 
189                         }
190
191                         
192                         //check for required parameters
193                         fastaFile = validParameter.validFile(parameters, "fasta", true);
194                         if (fastaFile == "not found") {                                 
195                                 fastaFile = m->getFastaFile(); 
196                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
197                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
198                         }else if (fastaFile == "not open") { abort = true; }    
199                         else { m->setFastaFile(fastaFile); }
200                         
201                         //if the user changes the output directory command factory will send this info to us in the output parameter 
202                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
203                                 outputDir = ""; 
204                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
205                         }
206                 
207                         
208                         //check for optional parameter and set defaults
209                         // ...at some point should added some additional type checking...
210                         string temp;
211                         temp = validParameter.validFile(parameters, "flip", false);
212                         if (temp == "not found")    {   flip = 0;       }
213                         else {  flip = m->isTrue(temp);         }
214                 
215                         temp = validParameter.validFile(parameters, "oligos", true);
216                         if (temp == "not found"){       oligoFile = "";         }
217                         else if(temp == "not open"){    abort = true;   } 
218                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
219                         
220                         
221                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
222                         m->mothurConvert(temp, maxAmbig);  
223
224                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
225                         m->mothurConvert(temp, maxHomoP);  
226
227                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
228                         m->mothurConvert(temp, minLength); 
229                         
230                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
231                         m->mothurConvert(temp, maxLength);
232                         
233                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
234                         m->mothurConvert(temp, bdiffs);
235                         
236                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
237                         m->mothurConvert(temp, pdiffs);
238             
239             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
240                         m->mothurConvert(temp, ldiffs);
241             
242             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
243                         m->mothurConvert(temp, sdiffs);
244                         
245                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
246                         m->mothurConvert(temp, tdiffs);
247                         
248                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
249                         
250                         temp = validParameter.validFile(parameters, "qfile", true);     
251                         if (temp == "not found")        {       qFileName = "";         }
252                         else if(temp == "not open")     {       abort = true;           }
253                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
254                         
255                         temp = validParameter.validFile(parameters, "name", true);      
256                         if (temp == "not found")        {       nameFile = "";          }
257                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
258                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
259                         
260                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
261                         m->mothurConvert(temp, qThreshold);
262                         
263                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
264                         qtrim = m->isTrue(temp);
265
266                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
267                         convert(temp, qRollAverage);
268
269                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
270                         convert(temp, qWindowAverage);
271
272                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
273                         convert(temp, qWindowSize);
274
275                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
276                         convert(temp, qWindowStep);
277
278                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
279                         convert(temp, qAverage);
280
281                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
282                         convert(temp, keepFirst);
283
284                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
285                         convert(temp, removeLast);
286                         
287                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
288                         allFiles = m->isTrue(temp);
289             
290             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
291                         keepforward = m->isTrue(temp);
292                         
293                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
294                         m->setProcessors(temp);
295                         m->mothurConvert(temp, processors); 
296                         
297                         
298                         if(allFiles && (oligoFile == "")){
299                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
300                         }
301                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
302                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
303                                 qAverage=0;
304                                 qThreshold=0;
305                         }
306                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
307                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
308                                 abort = true;
309                         }
310                         
311                         if (nameFile == "") {
312                                 vector<string> files; files.push_back(fastaFile);
313                                 parser.getNameFile(files);
314                         }
315                 }
316
317         }
318         catch(exception& e) {
319                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
320                 exit(1);
321         }
322 }
323 //***************************************************************************************************************
324
325 int TrimSeqsCommand::execute(){
326         try{
327         
328                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
329                 
330                 numFPrimers = 0;  //this needs to be initialized
331                 numRPrimers = 0;
332                 createGroup = false;
333                 vector<vector<string> > fastaFileNames;
334                 vector<vector<string> > qualFileNames;
335                 vector<vector<string> > nameFileNames;
336                 
337                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
338                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
339                 
340                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
341                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
342                 
343                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
344                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
345                 
346                 if (qFileName != "") {
347                         outputNames.push_back(trimQualFile);
348                         outputNames.push_back(scrapQualFile);
349                         outputTypes["qfile"].push_back(trimQualFile);
350                         outputTypes["qfile"].push_back(scrapQualFile); 
351                 }
352                 
353                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
354                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
355                 
356                 if (nameFile != "") {
357                         m->readNames(nameFile, nameMap);
358                         outputNames.push_back(trimNameFile);
359                         outputNames.push_back(scrapNameFile);
360                         outputTypes["name"].push_back(trimNameFile);
361                         outputTypes["name"].push_back(scrapNameFile); 
362                 }
363                 
364                 if (m->control_pressed) { return 0; }
365                 
366                 string outputGroupFileName;
367                 if(oligoFile != ""){
368                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
369                         if (createGroup) {
370                                 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
371                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
372                         }
373                 }
374                 
375         //fills lines and qlines
376                 setLines(fastaFile, qFileName);
377                 
378                 //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
379                                 if(processors == 1){
380                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
381                                 }else{
382                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
383                                 }       
384                 //#else
385                         //      driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
386                 //#endif
387                 
388                 if (m->control_pressed) {  return 0; }                  
389         
390                 if(allFiles){
391                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
392                         map<string, string>::iterator it;
393                         set<string> namesToRemove;
394                         for(int i=0;i<fastaFileNames.size();i++){
395                                 for(int j=0;j<fastaFileNames[0].size();j++){
396                                         if (fastaFileNames[i][j] != "") {
397                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
398                                                         if(m->isBlank(fastaFileNames[i][j])){
399                                                                 m->mothurRemove(fastaFileNames[i][j]);
400                                                                 namesToRemove.insert(fastaFileNames[i][j]);
401                                                         
402                                                                 if(qFileName != ""){
403                                                                         m->mothurRemove(qualFileNames[i][j]);
404                                                                         namesToRemove.insert(qualFileNames[i][j]);
405                                                                 }
406                                                                 
407                                                                 if(nameFile != ""){
408                                                                         m->mothurRemove(nameFileNames[i][j]);
409                                                                         namesToRemove.insert(nameFileNames[i][j]);
410                                                                 }
411                                                         }else{  
412                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
413                                                                 if (it == uniqueFastaNames.end()) {     
414                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
415                                                                 }       
416                                                         }
417                                                 }
418                                         }
419                                 }
420                         }
421                         
422                         //remove names for outputFileNames, just cleans up the output
423                         vector<string> outputNames2;
424                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
425                         outputNames = outputNames2;
426                         
427                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
428                                 ifstream in;
429                                 m->openInputFile(it->first, in);
430                                 
431                                 ofstream out;
432                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
433                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
434                                 m->openOutputFile(thisGroupName, out);
435                                 
436                                 while (!in.eof()){
437                                         if (m->control_pressed) { break; }
438                                         
439                                         Sequence currSeq(in); m->gobble(in);
440                                         out << currSeq.getName() << '\t' << it->second << endl;
441                                 }
442                                 in.close();
443                                 out.close();
444                         }
445                 }
446                 
447                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
448
449                 //output group counts
450                 m->mothurOutEndLine();
451                 int total = 0;
452                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
453                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
454                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
455                 }
456                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
457                 
458                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
459
460                 //set fasta file as new current fastafile
461                 string current = "";
462                 itTypes = outputTypes.find("fasta");
463                 if (itTypes != outputTypes.end()) {
464                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
465                 }
466                 
467                 itTypes = outputTypes.find("name");
468                 if (itTypes != outputTypes.end()) {
469                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
470                 }
471                 
472                 itTypes = outputTypes.find("qfile");
473                 if (itTypes != outputTypes.end()) {
474                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
475                 }
476                 
477                 itTypes = outputTypes.find("group");
478                 if (itTypes != outputTypes.end()) {
479                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
480                 }
481
482                 m->mothurOutEndLine();
483                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
484                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
485                 m->mothurOutEndLine();
486                 
487                 return 0;       
488                         
489         }
490         catch(exception& e) {
491                 m->errorOut(e, "TrimSeqsCommand", "execute");
492                 exit(1);
493         }
494 }
495                 
496 /**************************************************************************************/
497
498 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {      
499                 
500         try {
501                 
502                 ofstream trimFASTAFile;
503                 m->openOutputFile(trimFileName, trimFASTAFile);
504                 
505                 ofstream scrapFASTAFile;
506                 m->openOutputFile(scrapFileName, scrapFASTAFile);
507                 
508                 ofstream trimQualFile;
509                 ofstream scrapQualFile;
510                 if(qFileName != ""){
511                         m->openOutputFile(trimQFileName, trimQualFile);
512                         m->openOutputFile(scrapQFileName, scrapQualFile);
513                 }
514                 
515                 ofstream trimNameFile;
516                 ofstream scrapNameFile;
517                 if(nameFile != ""){
518                         m->openOutputFile(trimNFileName, trimNameFile);
519                         m->openOutputFile(scrapNFileName, scrapNameFile);
520                 }
521                 
522                 
523                 ofstream outGroupsFile;
524                 if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
525                 if(allFiles){
526                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
527                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
528                                         if (fastaFileNames[i][j] != "") {
529                                                 ofstream temp;
530                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
531                                                 if(qFileName != ""){
532                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
533                                                 }
534                                                 
535                                                 if(nameFile != ""){
536                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
537                                                 }
538                                         }
539                                 }
540                         }
541                 }
542                 
543                 ifstream inFASTA;
544                 m->openInputFile(filename, inFASTA);
545                 inFASTA.seekg(line.start);
546                 
547                 ifstream qFile;
548                 if(qFileName != "")     {
549                         m->openInputFile(qFileName, qFile);
550                         qFile.seekg(qline.start);  
551                 }
552                 
553                 int count = 0;
554                 bool moreSeqs = 1;
555                 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
556         
557                 while (moreSeqs) {
558                                 
559                         if (m->control_pressed) { 
560                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
561                                 if (createGroup) {       outGroupsFile.close();   }
562
563                                 if(qFileName != ""){
564                                         qFile.close();
565                                 }
566                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
567
568                                 return 0;
569                         }
570                         
571                         int success = 1;
572                         string trashCode = "";
573                         int currentSeqsDiffs = 0;
574
575                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
576                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
577                         QualityScores currQual;
578                         if(qFileName != ""){
579                                 currQual = QualityScores(qFile);  m->gobble(qFile);
580                         }
581                         
582                         string origSeq = currSeq.getUnaligned();
583                         if (origSeq != "") {
584                                 
585                                 int barcodeIndex = 0;
586                                 int primerIndex = 0;
587                                 
588                 if(numLinkers != 0){
589                                         success = trimOligos.stripLinker(currSeq, currQual);
590                                         if(success > ldiffs)            {       trashCode += 'k';       }
591                                         else{ currentSeqsDiffs += success;  }
592
593                                 }
594                 
595                                 if(barcodes.size() != 0){
596                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
597                                         if(success > bdiffs)            {       trashCode += 'b';       }
598                                         else{ currentSeqsDiffs += success;  }
599                                 }
600                                 
601                 if(numSpacers != 0){
602                                         success = trimOligos.stripSpacer(currSeq, currQual);
603                                         if(success > sdiffs)            {       trashCode += 's';       }
604                                         else{ currentSeqsDiffs += success;  }
605
606                                 }
607                 
608                                 if(numFPrimers != 0){
609                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
610                                         if(success > pdiffs)            {       trashCode += 'f';       }
611                                         else{ currentSeqsDiffs += success;  }
612                                 }
613                                 
614                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
615                                 
616                                 if(numRPrimers != 0){
617                                         success = trimOligos.stripReverse(currSeq, currQual);
618                                         if(!success)                            {       trashCode += 'r';       }
619                                 }
620
621                                 if(keepFirst != 0){
622                                         success = keepFirstTrim(currSeq, currQual);
623                                 }
624                                 
625                                 if(removeLast != 0){
626                                         success = removeLastTrim(currSeq, currQual);
627                                         if(!success)                            {       trashCode += 'l';       }
628                                 }
629
630                                 
631                                 if(qFileName != ""){
632                                         int origLength = currSeq.getNumBases();
633                                         
634                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
635                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
636                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
637                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
638                                         else                                            {       success = 1;                            }
639                                         
640                                         //you don't want to trim, if it fails above then scrap it
641                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
642                                         
643                                         if(!success)                            {       trashCode += 'q';       }
644                                 }                               
645                 
646                                 if(minLength > 0 || maxLength > 0){
647                                         success = cullLength(currSeq);
648                                         if(!success)                            {       trashCode += 'l';       }
649                                 }
650                                 if(maxHomoP > 0){
651                                         success = cullHomoP(currSeq);
652                                         if(!success)                            {       trashCode += 'h';       }
653                                 }
654                                 if(maxAmbig != -1){
655                                         success = cullAmbigs(currSeq);
656                                         if(!success)                            {       trashCode += 'n';       }
657                                 }
658                                 
659                                 if(flip){               // should go last                       
660                                         currSeq.reverseComplement();
661                                         if(qFileName != ""){
662                                                 currQual.flipQScores(); 
663                                         }
664                                 }
665                                 
666                                 if(trashCode.length() == 0){
667                                         currSeq.setAligned(currSeq.getUnaligned());
668                                         currSeq.printSequence(trimFASTAFile);
669                                         
670                                         if(qFileName != ""){
671                                                 currQual.printQScores(trimQualFile);
672                                         }
673                                         
674                                         if(nameFile != ""){
675                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
676                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
677                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
678                                         }
679                                         
680                                         if (createGroup) {
681                                                 if(barcodes.size() != 0){
682                                                         string thisGroup = barcodeNameVector[barcodeIndex];
683                                                         if (primers.size() != 0) { 
684                                                                 if (primerNameVector[primerIndex] != "") { 
685                                                                         if(thisGroup != "") {
686                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
687                                                                         }else {
688                                                                                 thisGroup = primerNameVector[primerIndex]; 
689                                                                         }
690                                                                 } 
691                                                         }
692                                                         
693                                                         outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
694                                                         
695                                                         if (nameFile != "") {
696                                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
697                                                                 if (itName != nameMap.end()) { 
698                                                                         vector<string> thisSeqsNames; 
699                                                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
700                                                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
701                                                                                 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
702                                                                         }
703                                                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
704                                                         }
705                                                         
706                                                         map<string, int>::iterator it = groupCounts.find(thisGroup);
707                                                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
708                                                         else { groupCounts[it->first]++; }
709                                                                 
710                                                 }
711                                         }
712                                         
713                                         if(allFiles){
714                                                 ofstream output;
715                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
716                                                 currSeq.printSequence(output);
717                                                 output.close();
718                                                 
719                                                 if(qFileName != ""){
720                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
721                                                         currQual.printQScores(output);
722                                                         output.close();                                                 
723                                                 }
724                                                 
725                                                 if(nameFile != ""){
726                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
727                                                         if (itName != nameMap.end()) { 
728                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
729                                                                 output << itName->first << '\t' << itName->second << endl; 
730                                                                 output.close();
731                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
732                                                 }
733                                         }
734                                 }
735                                 else{
736                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
737                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
738                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
739                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
740                                         }
741                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
742                                         currSeq.setUnaligned(origSeq);
743                                         currSeq.setAligned(origSeq);
744                                         currSeq.printSequence(scrapFASTAFile);
745                                         if(qFileName != ""){
746                                                 currQual.printQScores(scrapQualFile);
747                                         }
748                                 }
749                                 count++;
750                         }
751                         
752                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
753                                 unsigned long long pos = inFASTA.tellg();
754                                 if ((pos == -1) || (pos >= line.end)) { break; }
755                         
756                         #else
757                                 if (inFASTA.eof()) { break; }
758                         #endif
759                         
760                         //report progress
761                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
762                         
763                 }
764                 //report progress
765                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
766                 
767                 
768                 inFASTA.close();
769                 trimFASTAFile.close();
770                 scrapFASTAFile.close();
771                 if (createGroup) {       outGroupsFile.close();   }
772                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
773                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
774                 
775                 return count;
776         }
777         catch(exception& e) {
778                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
779                 exit(1);
780         }
781 }
782
783 /**************************************************************************************************/
784
785 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
786         try {
787         
788         int process = 1;
789                 int exitCommand = 1;
790                 processIDS.clear();
791                 
792 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
793                                 //loop through and create all the processes you want
794                 while (process != processors) {
795                         int pid = fork();
796                         
797                         if (pid > 0) {
798                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
799                                 process++;
800                         }else if (pid == 0){
801                                 
802                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
803                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
804                                 vector<vector<string> > tempNameFileNames = nameFileNames;
805
806                                 if(allFiles){
807                                         ofstream temp;
808
809                                         for(int i=0;i<tempFASTAFileNames.size();i++){
810                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
811                                                         if (tempFASTAFileNames[i][j] != "") {
812                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
813                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
814
815                                                                 if(qFileName != ""){
816                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
817                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
818                                                                 }
819                                                                 if(nameFile != ""){
820                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
821                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
822                                                                 }
823                                                         }
824                                                 }
825                                         }
826                                 }
827                                                         
828                                 driverCreateTrim(filename,
829                                                                  qFileName,
830                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
831                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
832                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
833                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
834                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
835                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
836                                                                  (groupFile + toString(getpid()) + ".temp"),
837                                                                  tempFASTAFileNames,
838                                                                  tempPrimerQualFileNames,
839                                                                  tempNameFileNames,
840                                                                  lines[process],
841                                                                  qLines[process]);
842                                 
843                                 //pass groupCounts to parent
844                                 if(createGroup){
845                                         ofstream out;
846                                         string tempFile = filename + toString(getpid()) + ".num.temp";
847                                         m->openOutputFile(tempFile, out);
848                                         
849                                         out << groupCounts.size() << endl;
850                                         
851                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
852                                                 out << it->first << '\t' << it->second << endl;
853                                         }
854                                         out.close();
855                                 }
856                                 exit(0);
857                         }else { 
858                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
859                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
860                                 exit(0);
861                         }
862                 }
863                 
864                 //parent do my part
865                 ofstream temp;
866                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
867                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
868                 if(qFileName != ""){
869                         m->openOutputFile(trimQualFileName, temp);              temp.close();
870                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
871                 }
872                 if (nameFile != "") {
873                         m->openOutputFile(trimNameFileName, temp);              temp.close();
874                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
875                 }
876
877                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
878                 
879                 //force parent to wait until all the processes are done
880                 for (int i=0;i<processIDS.size();i++) { 
881                         int temp = processIDS[i];
882                         wait(&temp);
883                 }
884 #else
885         //////////////////////////////////////////////////////////////////////////////////////////////////////
886                 //Windows version shared memory, so be careful when passing variables through the trimData struct. 
887                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
888                 //////////////////////////////////////////////////////////////////////////////////////////////////////
889                 
890                 vector<trimData*> pDataArray; 
891                 DWORD   dwThreadIdArray[processors-1];
892                 HANDLE  hThreadArray[processors-1]; 
893                 
894                 //Create processor worker threads.
895                 for( int i=0; i<processors-1; i++){
896                         
897             string extension = "";
898                         if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
899             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
900             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
901             vector<vector<string> > tempNameFileNames = nameFileNames;
902             
903             if(allFiles){
904                 ofstream temp;
905                 
906                 for(int i=0;i<tempFASTAFileNames.size();i++){
907                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
908                         if (tempFASTAFileNames[i][j] != "") {
909                             tempFASTAFileNames[i][j] += extension;
910                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
911                             
912                             if(qFileName != ""){
913                                 tempPrimerQualFileNames[i][j] += extension;
914                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
915                             }
916                             if(nameFile != ""){
917                                 tempNameFileNames[i][j] += extension;
918                                 m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
919                             }
920                         }
921                     }
922                 }
923             }
924
925             
926                         trimData* tempTrim = new trimData(filename,
927                                               qFileName, nameFile,
928                                               (trimFASTAFileName+extension),
929                                               (scrapFASTAFileName+extension),
930                                               (trimQualFileName+extension),
931                                               (scrapQualFileName+extension),
932                                               (trimNameFileName+extension),
933                                               (scrapNameFileName+extension),
934                                               (groupFile+extension),
935                                               tempFASTAFileNames,
936                                               tempPrimerQualFileNames,
937                                               tempNameFileNames,
938                                               lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
939                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, 
940                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
941                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
942                                              minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
943                         pDataArray.push_back(tempTrim);
944             
945                         hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
946                 }
947         
948         //parent do my part
949                 ofstream temp;
950                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
951                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
952                 if(qFileName != ""){
953                         m->openOutputFile(trimQualFileName, temp);              temp.close();
954                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
955                 }
956                 if (nameFile != "") {
957                         m->openOutputFile(trimNameFileName, temp);              temp.close();
958                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
959                 }
960         
961                 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
962         processIDS.push_back(processors-1);
963
964         
965                 //Wait until all threads have terminated.
966                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
967                 
968                 //Close all thread handles and free memory allocations.
969                 for(int i=0; i < pDataArray.size(); i++){
970                         for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
971                 map<string, int>::iterator it2 = groupCounts.find(it->first);
972                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
973                 else { groupCounts[it->first] += it->second; }
974             }
975             CloseHandle(hThreadArray[i]);
976                         delete pDataArray[i];
977                 }
978         
979 #endif          
980         
981         
982         //append files
983                 for(int i=0;i<processIDS.size();i++){
984                         
985                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
986                         
987                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
988                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
989                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
990                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
991                         
992                         if(qFileName != ""){
993                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
994                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
995                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
996                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
997                         }
998                         
999                         if(nameFile != ""){
1000                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1001                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1002                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1003                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1004                         }
1005                         
1006                         if(createGroup){
1007                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1008                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1009                         }
1010                         
1011                         
1012                         if(allFiles){
1013                                 for(int j=0;j<fastaFileNames.size();j++){
1014                                         for(int k=0;k<fastaFileNames[j].size();k++){
1015                                                 if (fastaFileNames[j][k] != "") {
1016                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1017                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1018                                                         
1019                                                         if(qFileName != ""){
1020                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1021                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1022                                                         }
1023                                                         
1024                                                         if(nameFile != ""){
1025                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1026                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1027                                                         }
1028                                                 }
1029                                         }
1030                                 }
1031                         }
1032                         
1033             #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1034                         if(createGroup){
1035                                 ifstream in;
1036                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1037                                 m->openInputFile(tempFile, in);
1038                                 int tempNum;
1039                                 string group;
1040                                 
1041                                 in >> tempNum; m->gobble(in);
1042                                 
1043                                 if (tempNum != 0) {
1044                                         while (!in.eof()) { 
1045                                                 in >> group >> tempNum; m->gobble(in);
1046                         
1047                                                 map<string, int>::iterator it = groupCounts.find(group);
1048                                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
1049                                                 else { groupCounts[it->first] += tempNum; }
1050                                         }
1051                                 }
1052                                 in.close(); m->mothurRemove(tempFile);
1053                         }
1054             #endif
1055                 }
1056
1057         return exitCommand;
1058         }
1059         catch(exception& e) {
1060                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1061                 exit(1);
1062         }
1063 }
1064
1065 /**************************************************************************************************/
1066
1067 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1068         try {
1069         
1070         vector<unsigned long long> fastaFilePos;
1071                 vector<unsigned long long> qfileFilePos;
1072                 
1073                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1074                 //set file positions for fasta file
1075                 fastaFilePos = m->divideFile(filename, processors);
1076                 
1077                 if (qfilename == "") { return processors; }
1078                 
1079                 //get name of first sequence in each chunk
1080                 map<string, int> firstSeqNames;
1081                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1082                         ifstream in;
1083                         m->openInputFile(filename, in);
1084                         in.seekg(fastaFilePos[i]);
1085                 
1086                         Sequence temp(in); 
1087                         firstSeqNames[temp.getName()] = i;
1088                 
1089                         in.close();
1090                 }
1091                                 
1092                 //seach for filePos of each first name in the qfile and save in qfileFilePos
1093                 ifstream inQual;
1094                 m->openInputFile(qfilename, inQual);
1095                 
1096                 string input;
1097                 while(!inQual.eof()){   
1098                         input = m->getline(inQual);
1099
1100                         if (input.length() != 0) {
1101                                 if(input[0] == '>'){ //this is a sequence name line
1102                                         istringstream nameStream(input);
1103                                         
1104                                         string sname = "";  nameStream >> sname;
1105                                         sname = sname.substr(1);
1106                                         
1107                                         map<string, int>::iterator it = firstSeqNames.find(sname);
1108                                         
1109                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
1110                                                 unsigned long long pos = inQual.tellg(); 
1111                                                 qfileFilePos.push_back(pos - input.length() - 1);       
1112                                                 firstSeqNames.erase(it);
1113                                         }
1114                                 }
1115                         }
1116                         
1117                         if (firstSeqNames.size() == 0) { break; }
1118                 }
1119                 inQual.close();
1120                 
1121                 
1122                 if (firstSeqNames.size() != 0) { 
1123                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1124                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1125                         }
1126                         qFileName = "";
1127                         return processors;
1128                 }
1129
1130                 //get last file position of qfile
1131                 FILE * pFile;
1132                 unsigned long long size;
1133                 
1134                 //get num bytes in file
1135                 pFile = fopen (qfilename.c_str(),"rb");
1136                 if (pFile==NULL) perror ("Error opening file");
1137                 else{
1138                         fseek (pFile, 0, SEEK_END);
1139                         size=ftell (pFile);
1140                         fclose (pFile);
1141                 }
1142                 
1143                 qfileFilePos.push_back(size);
1144         
1145         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1146                         lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1147                         if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
1148                 }       
1149                 if(qfilename == "")     {       qLines = lines; } //files with duds
1150                 
1151                 return processors;
1152                 
1153                 #else
1154             
1155         if (processors == 1) { //save time
1156                         //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1157                         //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1158             lines.push_back(linePair(0, 1000));
1159             if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
1160         }else{
1161             int numFastaSeqs = 0;
1162             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
1163         
1164             if (qfilename != "") { 
1165                 int numQualSeqs = 0;
1166                 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
1167                 
1168                 if (numFastaSeqs != numQualSeqs) {
1169                     m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
1170                 }
1171             }
1172         
1173             //figure out how many sequences you have to process
1174             int numSeqsPerProcessor = numFastaSeqs / processors;
1175             for (int i = 0; i < processors; i++) {
1176                 int startIndex =  i * numSeqsPerProcessor;
1177                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
1178                 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1179                 cout << fastaFilePos[startIndex] << '\t' << numSeqsPerProcessor << endl;
1180                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1181             }
1182         
1183             if(qfilename == "") {       qLines = lines; } //files with duds
1184         }
1185                         return 1;
1186                 
1187                 #endif
1188         }
1189         catch(exception& e) {
1190                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1191                 exit(1);
1192         }
1193 }
1194
1195 //***************************************************************************************************************
1196
1197 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1198         try {
1199                 ifstream inOligos;
1200                 m->openInputFile(oligoFile, inOligos);
1201                 
1202                 ofstream test;
1203                 
1204                 string type, oligo, group;
1205
1206                 int indexPrimer = 0;
1207                 int indexBarcode = 0;
1208                 
1209                 while(!inOligos.eof()){
1210
1211                         inOligos >> type; 
1212                                         
1213                         if(type[0] == '#'){
1214                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1215                                 m->gobble(inOligos);
1216                         }
1217                         else{
1218                                 m->gobble(inOligos);
1219                                 //make type case insensitive
1220                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1221                                 
1222                                 inOligos >> oligo;
1223                                 
1224                                 for(int i=0;i<oligo.length();i++){
1225                                         oligo[i] = toupper(oligo[i]);
1226                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1227                                 }
1228                                 
1229                                 if(type == "FORWARD"){
1230                                         group = "";
1231                                         
1232                                         // get rest of line in case there is a primer name
1233                                         while (!inOligos.eof()) {       
1234                                                 char c = inOligos.get(); 
1235                                                 if (c == 10 || c == 13){        break;  }
1236                                                 else if (c == 32 || c == 9){;} //space or tab
1237                                                 else {  group += c;  }
1238                                         } 
1239                                         
1240                                         //check for repeat barcodes
1241                                         map<string, int>::iterator itPrime = primers.find(oligo);
1242                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1243                                         
1244                                         primers[oligo]=indexPrimer; indexPrimer++;              
1245                                         primerNameVector.push_back(group);
1246                                 }
1247                                 else if(type == "REVERSE"){
1248                                         Sequence oligoRC("reverse", oligo);
1249                                         oligoRC.reverseComplement();
1250                                         revPrimer.push_back(oligoRC.getUnaligned());
1251                                 }
1252                                 else if(type == "BARCODE"){
1253                                         inOligos >> group;
1254                                         
1255                                         //check for repeat barcodes
1256                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1257                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1258                                                 
1259                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1260                                         barcodeNameVector.push_back(group);
1261                                 }else if(type == "LINKER"){
1262                                         linker.push_back(oligo);
1263                                 }else if(type == "SPACER"){
1264                                         spacer.push_back(oligo);
1265                                 }
1266                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
1267                         }
1268                         m->gobble(inOligos);
1269                 }       
1270                 inOligos.close();
1271                 
1272                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1273                 
1274                 //add in potential combos
1275                 if(barcodeNameVector.size() == 0){
1276                         barcodes[""] = 0;
1277                         barcodeNameVector.push_back("");                        
1278                 }
1279                 
1280                 if(primerNameVector.size() == 0){
1281                         primers[""] = 0;
1282                         primerNameVector.push_back("");                 
1283                 }
1284                 
1285                 fastaFileNames.resize(barcodeNameVector.size());
1286                 for(int i=0;i<fastaFileNames.size();i++){
1287                         fastaFileNames[i].assign(primerNameVector.size(), "");
1288                 }
1289                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1290                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1291                 
1292                 if(allFiles){
1293                         set<string> uniqueNames; //used to cleanup outputFileNames
1294                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1295                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1296                                         
1297                                         string primerName = primerNameVector[itPrimer->second];
1298                                         string barcodeName = barcodeNameVector[itBar->second];
1299                                         
1300                                         string comboGroupName = "";
1301                                         string fastaFileName = "";
1302                                         string qualFileName = "";
1303                                         string nameFileName = "";
1304                                         
1305                                         if(primerName == ""){
1306                                                 comboGroupName = barcodeNameVector[itBar->second];
1307                                         }
1308                                         else{
1309                                                 if(barcodeName == ""){
1310                                                         comboGroupName = primerNameVector[itPrimer->second];
1311                                                 }
1312                                                 else{
1313                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1314                                                 }
1315                                         }
1316                                         
1317                                         
1318                                         ofstream temp;
1319                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1320                                         if (uniqueNames.count(fastaFileName) == 0) {
1321                                                 outputNames.push_back(fastaFileName);
1322                                                 outputTypes["fasta"].push_back(fastaFileName);
1323                                                 uniqueNames.insert(fastaFileName);
1324                                         }
1325                                         
1326                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1327                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1328                                         
1329                                         if(qFileName != ""){
1330                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1331                                                 if (uniqueNames.count(qualFileName) == 0) {
1332                                                         outputNames.push_back(qualFileName);
1333                                                         outputTypes["qfile"].push_back(qualFileName);
1334                                                 }
1335                                                 
1336                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1337                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1338                                         }
1339                                         
1340                                         if(nameFile != ""){
1341                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1342                                                 if (uniqueNames.count(nameFileName) == 0) {
1343                                                         outputNames.push_back(nameFileName);
1344                                                         outputTypes["name"].push_back(nameFileName);
1345                                                 }
1346                                                 
1347                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1348                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1349                                         }
1350                                         
1351                                 }
1352                         }
1353                 }
1354                 numFPrimers = primers.size();
1355                 numRPrimers = revPrimer.size();
1356         numLinkers = linker.size();
1357         numSpacers = spacer.size();
1358                 
1359                 bool allBlank = true;
1360                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1361                         if (barcodeNameVector[i] != "") {
1362                                 allBlank = false;
1363                                 break;
1364                         }
1365                 }
1366                 for (int i = 0; i < primerNameVector.size(); i++) {
1367                         if (primerNameVector[i] != "") {
1368                                 allBlank = false;
1369                                 break;
1370                         }
1371                 }
1372                 
1373                 if (allBlank) {
1374                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1375                         allFiles = false;
1376                         return false;
1377                 }
1378                 
1379                 return true;
1380                 
1381         }
1382         catch(exception& e) {
1383                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1384                 exit(1);
1385         }
1386 }
1387 //***************************************************************************************************************
1388
1389 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1390         try {
1391                 bool success = 1;
1392                 if(qscores.getName() != ""){
1393                         qscores.trimQScores(-1, keepFirst);
1394                 }
1395                 sequence.trim(keepFirst);
1396                 return success;
1397         }
1398         catch(exception& e) {
1399                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1400                 exit(1);
1401         }
1402         
1403 }       
1404
1405 //***************************************************************************************************************
1406
1407 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1408         try {
1409                 bool success = 0;
1410                 
1411                 int length = sequence.getNumBases() - removeLast;
1412                 
1413                 if(length > 0){
1414                         if(qscores.getName() != ""){
1415                                 qscores.trimQScores(-1, length);
1416                         }
1417                         sequence.trim(length);
1418                         success = 1;
1419                 }
1420                 else{
1421                         success = 0;
1422                 }
1423
1424                 return success;
1425         }
1426         catch(exception& e) {
1427                 m->errorOut(e, "removeLastTrim", "countDiffs");
1428                 exit(1);
1429         }
1430         
1431 }       
1432
1433 //***************************************************************************************************************
1434
1435 bool TrimSeqsCommand::cullLength(Sequence& seq){
1436         try {
1437         
1438                 int length = seq.getNumBases();
1439                 bool success = 0;       //guilty until proven innocent
1440                 
1441                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1442                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1443                 else                                                                                            {       success = 0;    }
1444                 
1445                 return success;
1446         
1447         }
1448         catch(exception& e) {
1449                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1450                 exit(1);
1451         }
1452         
1453 }
1454
1455 //***************************************************************************************************************
1456
1457 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1458         try {
1459                 int longHomoP = seq.getLongHomoPolymer();
1460                 bool success = 0;       //guilty until proven innocent
1461                 
1462                 if(longHomoP <= maxHomoP){      success = 1;    }
1463                 else                                    {       success = 0;    }
1464                 
1465                 return success;
1466         }
1467         catch(exception& e) {
1468                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1469                 exit(1);
1470         }
1471         
1472 }
1473
1474 //***************************************************************************************************************
1475
1476 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1477         try {
1478                 int numNs = seq.getAmbigBases();
1479                 bool success = 0;       //guilty until proven innocent
1480                 
1481                 if(numNs <= maxAmbig)   {       success = 1;    }
1482                 else                                    {       success = 0;    }
1483                 
1484                 return success;
1485         }
1486         catch(exception& e) {
1487                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1488                 exit(1);
1489         }
1490         
1491 }
1492 //***************************************************************************************************************