]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::setParameters(){        
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
22                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
23                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
24                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
25                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
26                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
27                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
28         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
29                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
30         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
31                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
32                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
33                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
34                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
35                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
36                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
37                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
38                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
39                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
40                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
41                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
42                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
43                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
44                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
45                         
46                 vector<string> myArray;
47                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string TrimSeqsCommand::getHelpString(){        
57         try {
58                 string helpString = "";
59                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
60                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
61                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
62                 helpString += "The fasta parameter is required.\n";
63                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
64                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
65                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
66                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
67                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
68                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
69                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
70                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
71                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
72                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
73         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
74                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
75                 helpString += "The qfile parameter allows you to provide a quality file.\n";
76                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
77                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
78                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
79                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
80                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
81                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
82                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
83                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
84                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
85                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
86                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
87                 helpString += "The trim.seqs command should be in the following format: \n";
88                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
89                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
90                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
91                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
92                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100
101
102 //**********************************************************************************************************************
103
104 TrimSeqsCommand::TrimSeqsCommand(){     
105         try {
106                 abort = true; calledHelp = true; 
107                 setParameters();
108                 vector<string> tempOutNames;
109                 outputTypes["fasta"] = tempOutNames;
110                 outputTypes["qfile"] = tempOutNames;
111                 outputTypes["group"] = tempOutNames;
112                 outputTypes["name"] = tempOutNames;
113         }
114         catch(exception& e) {
115                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
116                 exit(1);
117         }
118 }
119 //***************************************************************************************************************
120
121 TrimSeqsCommand::TrimSeqsCommand(string option)  {
122         try {
123                 
124                 abort = false; calledHelp = false;   
125                 comboStarts = 0;
126                 
127                 //allow user to run help
128                 if(option == "help") { help(); abort = true; calledHelp = true; }
129                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
130                 
131                 else {
132                         vector<string> myArray = setParameters();
133                         
134                         OptionParser parser(option);
135                         map<string,string> parameters = parser.getParameters();
136                         
137                         ValidParameters validParameter;
138                         map<string,string>::iterator it;
139                         
140                         //check to make sure all parameters are valid for command
141                         for (it = parameters.begin(); it != parameters.end(); it++) { 
142                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
143                         }
144                         
145                         //initialize outputTypes
146                         vector<string> tempOutNames;
147                         outputTypes["fasta"] = tempOutNames;
148                         outputTypes["qfile"] = tempOutNames;
149                         outputTypes["group"] = tempOutNames;
150                         outputTypes["name"] = tempOutNames;
151                         
152                         //if the user changes the input directory command factory will send this info to us in the output parameter 
153                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
154                         if (inputDir == "not found"){   inputDir = "";          }
155                         else {
156                                 string path;
157                                 it = parameters.find("fasta");
158                                 //user has given a template file
159                                 if(it != parameters.end()){ 
160                                         path = m->hasPath(it->second);
161                                         //if the user has not given a path then, add inputdir. else leave path alone.
162                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
163                                 }
164                                 
165                                 it = parameters.find("oligos");
166                                 //user has given a template file
167                                 if(it != parameters.end()){ 
168                                         path = m->hasPath(it->second);
169                                         //if the user has not given a path then, add inputdir. else leave path alone.
170                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
171                                 }
172                                 
173                                 it = parameters.find("qfile");
174                                 //user has given a template file
175                                 if(it != parameters.end()){ 
176                                         path = m->hasPath(it->second);
177                                         //if the user has not given a path then, add inputdir. else leave path alone.
178                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
179                                 }
180                                 
181                                 it = parameters.find("name");
182                                 //user has given a template file
183                                 if(it != parameters.end()){ 
184                                         path = m->hasPath(it->second);
185                                         //if the user has not given a path then, add inputdir. else leave path alone.
186                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
187                                 }
188                                 
189                         }
190
191                         
192                         //check for required parameters
193                         fastaFile = validParameter.validFile(parameters, "fasta", true);
194                         if (fastaFile == "not found") {                                 
195                                 fastaFile = m->getFastaFile(); 
196                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
197                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
198                         }else if (fastaFile == "not open") { abort = true; }    
199                         else { m->setFastaFile(fastaFile); }
200                         
201                         //if the user changes the output directory command factory will send this info to us in the output parameter 
202                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
203                                 outputDir = ""; 
204                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
205                         }
206                 
207                         
208                         //check for optional parameter and set defaults
209                         // ...at some point should added some additional type checking...
210                         string temp;
211                         temp = validParameter.validFile(parameters, "flip", false);
212                         if (temp == "not found")    {   flip = 0;       }
213                         else {  flip = m->isTrue(temp);         }
214                 
215                         temp = validParameter.validFile(parameters, "oligos", true);
216                         if (temp == "not found"){       oligoFile = "";         }
217                         else if(temp == "not open"){    abort = true;   } 
218                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
219                         
220                         
221                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
222                         m->mothurConvert(temp, maxAmbig);  
223
224                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
225                         m->mothurConvert(temp, maxHomoP);  
226
227                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
228                         m->mothurConvert(temp, minLength); 
229                         
230                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
231                         m->mothurConvert(temp, maxLength);
232                         
233                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
234                         m->mothurConvert(temp, bdiffs);
235                         
236                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
237                         m->mothurConvert(temp, pdiffs);
238             
239             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
240                         m->mothurConvert(temp, ldiffs);
241             
242             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
243                         m->mothurConvert(temp, sdiffs);
244                         
245                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
246                         m->mothurConvert(temp, tdiffs);
247                         
248                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
249                         
250                         temp = validParameter.validFile(parameters, "qfile", true);     
251                         if (temp == "not found")        {       qFileName = "";         }
252                         else if(temp == "not open")     {       abort = true;           }
253                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
254                         
255                         temp = validParameter.validFile(parameters, "name", true);      
256                         if (temp == "not found")        {       nameFile = "";          }
257                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
258                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
259                         
260                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
261                         m->mothurConvert(temp, qThreshold);
262                         
263                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
264                         qtrim = m->isTrue(temp);
265
266                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
267                         convert(temp, qRollAverage);
268
269                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
270                         convert(temp, qWindowAverage);
271
272                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
273                         convert(temp, qWindowSize);
274
275                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
276                         convert(temp, qWindowStep);
277
278                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
279                         convert(temp, qAverage);
280
281                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
282                         convert(temp, keepFirst);
283
284                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
285                         convert(temp, removeLast);
286                         
287                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
288                         allFiles = m->isTrue(temp);
289             
290             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
291                         keepforward = m->isTrue(temp);
292                         
293                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
294                         m->setProcessors(temp);
295                         m->mothurConvert(temp, processors); 
296                         
297                         
298                         if(allFiles && (oligoFile == "")){
299                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
300                         }
301                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
302                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
303                                 qAverage=0;
304                                 qThreshold=0;
305                         }
306                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
307                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
308                                 abort = true;
309                         }
310                         
311                         if (nameFile == "") {
312                                 vector<string> files; files.push_back(fastaFile);
313                                 parser.getNameFile(files);
314                         }
315                 }
316
317         }
318         catch(exception& e) {
319                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
320                 exit(1);
321         }
322 }
323 //***************************************************************************************************************
324
325 int TrimSeqsCommand::execute(){
326         try{
327         
328                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
329                 
330                 numFPrimers = 0;  //this needs to be initialized
331                 numRPrimers = 0;
332         numSpacers = 0;
333         numLinkers = 0;
334                 createGroup = false;
335                 vector<vector<string> > fastaFileNames;
336                 vector<vector<string> > qualFileNames;
337                 vector<vector<string> > nameFileNames;
338                 
339                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
340                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
341                 
342                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
343                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
344                 
345                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
346                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
347                 
348                 if (qFileName != "") {
349                         outputNames.push_back(trimQualFile);
350                         outputNames.push_back(scrapQualFile);
351                         outputTypes["qfile"].push_back(trimQualFile);
352                         outputTypes["qfile"].push_back(scrapQualFile); 
353                 }
354                 
355                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
356                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
357                 
358                 if (nameFile != "") {
359                         m->readNames(nameFile, nameMap);
360                         outputNames.push_back(trimNameFile);
361                         outputNames.push_back(scrapNameFile);
362                         outputTypes["name"].push_back(trimNameFile);
363                         outputTypes["name"].push_back(scrapNameFile); 
364                 }
365                 
366                 if (m->control_pressed) { return 0; }
367                 
368                 string outputGroupFileName;
369                 if(oligoFile != ""){
370                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
371                         if (createGroup) {
372                                 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
373                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
374                         }
375                 }
376        
377         //fills lines and qlines
378                 setLines(fastaFile, qFileName);
379                 
380         if(processors == 1){
381             driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
382         }else{
383             createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
384         }       
385                 
386                 
387                 if (m->control_pressed) {  return 0; }                  
388         
389                 if(allFiles){
390                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
391                         map<string, string>::iterator it;
392                         set<string> namesToRemove;
393                         for(int i=0;i<fastaFileNames.size();i++){
394                                 for(int j=0;j<fastaFileNames[0].size();j++){
395                                         if (fastaFileNames[i][j] != "") {
396                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
397                                                         if(m->isBlank(fastaFileNames[i][j])){
398                                                                 m->mothurRemove(fastaFileNames[i][j]);
399                                                                 namesToRemove.insert(fastaFileNames[i][j]);
400                                                         
401                                                                 if(qFileName != ""){
402                                                                         m->mothurRemove(qualFileNames[i][j]);
403                                                                         namesToRemove.insert(qualFileNames[i][j]);
404                                                                 }
405                                                                 
406                                                                 if(nameFile != ""){
407                                                                         m->mothurRemove(nameFileNames[i][j]);
408                                                                         namesToRemove.insert(nameFileNames[i][j]);
409                                                                 }
410                                                         }else{  
411                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
412                                                                 if (it == uniqueFastaNames.end()) {     
413                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
414                                                                 }       
415                                                         }
416                                                 }
417                                         }
418                                 }
419                         }
420                         
421                         //remove names for outputFileNames, just cleans up the output
422                         vector<string> outputNames2;
423                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
424                         outputNames = outputNames2;
425                         
426                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
427                                 ifstream in;
428                                 m->openInputFile(it->first, in);
429                                 
430                                 ofstream out;
431                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
432                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
433                                 m->openOutputFile(thisGroupName, out);
434                                 
435                                 while (!in.eof()){
436                                         if (m->control_pressed) { break; }
437                                         
438                                         Sequence currSeq(in); m->gobble(in);
439                                         out << currSeq.getName() << '\t' << it->second << endl;
440                                 }
441                                 in.close();
442                                 out.close();
443                         }
444                 }
445                 
446                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
447
448                 //output group counts
449                 m->mothurOutEndLine();
450                 int total = 0;
451                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
452                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
453                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
454                 }
455                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
456                 
457                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
458
459                 //set fasta file as new current fastafile
460                 string current = "";
461                 itTypes = outputTypes.find("fasta");
462                 if (itTypes != outputTypes.end()) {
463                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
464                 }
465                 
466                 itTypes = outputTypes.find("name");
467                 if (itTypes != outputTypes.end()) {
468                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
469                 }
470                 
471                 itTypes = outputTypes.find("qfile");
472                 if (itTypes != outputTypes.end()) {
473                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
474                 }
475                 
476                 itTypes = outputTypes.find("group");
477                 if (itTypes != outputTypes.end()) {
478                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
479                 }
480
481                 m->mothurOutEndLine();
482                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
483                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
484                 m->mothurOutEndLine();
485                 
486                 return 0;       
487                         
488         }
489         catch(exception& e) {
490                 m->errorOut(e, "TrimSeqsCommand", "execute");
491                 exit(1);
492         }
493 }
494                 
495 /**************************************************************************************/
496
497 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {      
498                 
499         try {
500                 
501                 ofstream trimFASTAFile;
502                 m->openOutputFile(trimFileName, trimFASTAFile);
503                 
504                 ofstream scrapFASTAFile;
505                 m->openOutputFile(scrapFileName, scrapFASTAFile);
506                 
507                 ofstream trimQualFile;
508                 ofstream scrapQualFile;
509                 if(qFileName != ""){
510                         m->openOutputFile(trimQFileName, trimQualFile);
511                         m->openOutputFile(scrapQFileName, scrapQualFile);
512                 }
513                 
514                 ofstream trimNameFile;
515                 ofstream scrapNameFile;
516                 if(nameFile != ""){
517                         m->openOutputFile(trimNFileName, trimNameFile);
518                         m->openOutputFile(scrapNFileName, scrapNameFile);
519                 }
520                 
521                 
522                 ofstream outGroupsFile;
523                 if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
524                 if(allFiles){
525                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
526                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
527                                         if (fastaFileNames[i][j] != "") {
528                                                 ofstream temp;
529                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
530                                                 if(qFileName != ""){
531                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
532                                                 }
533                                                 
534                                                 if(nameFile != ""){
535                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
536                                                 }
537                                         }
538                                 }
539                         }
540                 }
541                 
542                 ifstream inFASTA;
543                 m->openInputFile(filename, inFASTA);
544                 inFASTA.seekg(line.start);
545                 
546                 ifstream qFile;
547                 if(qFileName != "")     {
548                         m->openInputFile(qFileName, qFile);
549                         qFile.seekg(qline.start);  
550                 }
551                 
552                 int count = 0;
553                 bool moreSeqs = 1;
554                 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
555         
556                 while (moreSeqs) {
557                                 
558                         if (m->control_pressed) { 
559                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
560                                 if (createGroup) {       outGroupsFile.close();   }
561
562                                 if(qFileName != ""){
563                                         qFile.close();
564                                 }
565                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
566
567                                 return 0;
568                         }
569                         
570                         int success = 1;
571                         string trashCode = "";
572                         int currentSeqsDiffs = 0;
573
574                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
575                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
576                         QualityScores currQual;
577                         if(qFileName != ""){
578                                 currQual = QualityScores(qFile);  m->gobble(qFile);
579                         }
580                         
581                         string origSeq = currSeq.getUnaligned();
582                         if (origSeq != "") {
583                                 
584                                 int barcodeIndex = 0;
585                                 int primerIndex = 0;
586                                 
587                 if(numLinkers != 0){
588                                         success = trimOligos.stripLinker(currSeq, currQual);
589                                         if(success > ldiffs)            {       trashCode += 'k';       }
590                                         else{ currentSeqsDiffs += success;  }
591
592                                 }
593                 
594                                 if(barcodes.size() != 0){
595                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
596                                         if(success > bdiffs)            {       trashCode += 'b';       }
597                                         else{ currentSeqsDiffs += success;  }
598                                 }
599                                 
600                 if(numSpacers != 0){
601                                         success = trimOligos.stripSpacer(currSeq, currQual);
602                                         if(success > sdiffs)            {       trashCode += 's';       }
603                                         else{ currentSeqsDiffs += success;  }
604
605                                 }
606                 
607                                 if(numFPrimers != 0){
608                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
609                                         if(success > pdiffs)            {       trashCode += 'f';       }
610                                         else{ currentSeqsDiffs += success;  }
611                                 }
612                                 
613                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
614                                 
615                                 if(numRPrimers != 0){
616                                         success = trimOligos.stripReverse(currSeq, currQual);
617                                         if(!success)                            {       trashCode += 'r';       }
618                                 }
619
620                                 if(keepFirst != 0){
621                                         success = keepFirstTrim(currSeq, currQual);
622                                 }
623                                 
624                                 if(removeLast != 0){
625                                         success = removeLastTrim(currSeq, currQual);
626                                         if(!success)                            {       trashCode += 'l';       }
627                                 }
628
629                                 
630                                 if(qFileName != ""){
631                                         int origLength = currSeq.getNumBases();
632                                         
633                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
634                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
635                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
636                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
637                                         else                                            {       success = 1;                            }
638                                         
639                                         //you don't want to trim, if it fails above then scrap it
640                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
641                                         
642                                         if(!success)                            {       trashCode += 'q';       }
643                                 }                               
644                 
645                                 if(minLength > 0 || maxLength > 0){
646                                         success = cullLength(currSeq);
647                                         if(!success)                            {       trashCode += 'l';       }
648                                 }
649                                 if(maxHomoP > 0){
650                                         success = cullHomoP(currSeq);
651                                         if(!success)                            {       trashCode += 'h';       }
652                                 }
653                                 if(maxAmbig != -1){
654                                         success = cullAmbigs(currSeq);
655                                         if(!success)                            {       trashCode += 'n';       }
656                                 }
657                                 
658                                 if(flip){               // should go last                       
659                                         currSeq.reverseComplement();
660                                         if(qFileName != ""){
661                                                 currQual.flipQScores(); 
662                                         }
663                                 }
664                                 
665                                 if(trashCode.length() == 0){
666                                         currSeq.setAligned(currSeq.getUnaligned());
667                                         currSeq.printSequence(trimFASTAFile);
668                                         
669                                         if(qFileName != ""){
670                                                 currQual.printQScores(trimQualFile);
671                                         }
672                                         
673                                         if(nameFile != ""){
674                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
675                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
676                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
677                                         }
678                                         
679                                         if (createGroup) {
680                                                 if(barcodes.size() != 0){
681                                                         string thisGroup = barcodeNameVector[barcodeIndex];
682                                                         if (primers.size() != 0) { 
683                                                                 if (primerNameVector[primerIndex] != "") { 
684                                                                         if(thisGroup != "") {
685                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
686                                                                         }else {
687                                                                                 thisGroup = primerNameVector[primerIndex]; 
688                                                                         }
689                                                                 } 
690                                                         }
691                                                         
692                                                         outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
693                                                         
694                                                         if (nameFile != "") {
695                                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
696                                                                 if (itName != nameMap.end()) { 
697                                                                         vector<string> thisSeqsNames; 
698                                                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
699                                                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
700                                                                                 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
701                                                                         }
702                                                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
703                                                         }
704                                                         
705                                                         map<string, int>::iterator it = groupCounts.find(thisGroup);
706                                                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
707                                                         else { groupCounts[it->first]++; }
708                                                                 
709                                                 }
710                                         }
711                                         
712                                         if(allFiles){
713                                                 ofstream output;
714                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
715                                                 currSeq.printSequence(output);
716                                                 output.close();
717                                                 
718                                                 if(qFileName != ""){
719                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
720                                                         currQual.printQScores(output);
721                                                         output.close();                                                 
722                                                 }
723                                                 
724                                                 if(nameFile != ""){
725                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
726                                                         if (itName != nameMap.end()) { 
727                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
728                                                                 output << itName->first << '\t' << itName->second << endl; 
729                                                                 output.close();
730                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
731                                                 }
732                                         }
733                                 }
734                                 else{
735                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
736                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
737                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
738                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
739                                         }
740                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
741                                         currSeq.setUnaligned(origSeq);
742                                         currSeq.setAligned(origSeq);
743                                         currSeq.printSequence(scrapFASTAFile);
744                                         if(qFileName != ""){
745                                                 currQual.printQScores(scrapQualFile);
746                                         }
747                                 }
748                                 count++;
749                         }
750                         
751                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
752                                 unsigned long long pos = inFASTA.tellg();
753                                 if ((pos == -1) || (pos >= line.end)) { break; }
754                         
755                         #else
756                                 if (inFASTA.eof()) { break; }
757                         #endif
758                         
759                         //report progress
760                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
761                         
762                 }
763                 //report progress
764                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
765                 
766                 
767                 inFASTA.close();
768                 trimFASTAFile.close();
769                 scrapFASTAFile.close();
770                 if (createGroup) {       outGroupsFile.close();   }
771                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
772                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
773                 
774                 return count;
775         }
776         catch(exception& e) {
777                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
778                 exit(1);
779         }
780 }
781
782 /**************************************************************************************************/
783
784 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
785         try {
786         
787         int process = 1;
788                 int exitCommand = 1;
789                 processIDS.clear();
790                 
791 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
792                                 //loop through and create all the processes you want
793                 while (process != processors) {
794                         int pid = fork();
795                         
796                         if (pid > 0) {
797                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
798                                 process++;
799                         }else if (pid == 0){
800                                 
801                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
802                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
803                                 vector<vector<string> > tempNameFileNames = nameFileNames;
804
805                                 if(allFiles){
806                                         ofstream temp;
807
808                                         for(int i=0;i<tempFASTAFileNames.size();i++){
809                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
810                                                         if (tempFASTAFileNames[i][j] != "") {
811                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
812                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
813
814                                                                 if(qFileName != ""){
815                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
816                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
817                                                                 }
818                                                                 if(nameFile != ""){
819                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
820                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
821                                                                 }
822                                                         }
823                                                 }
824                                         }
825                                 }
826                                                         
827                                 driverCreateTrim(filename,
828                                                                  qFileName,
829                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
830                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
831                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
832                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
833                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
834                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
835                                                                  (groupFile + toString(getpid()) + ".temp"),
836                                                                  tempFASTAFileNames,
837                                                                  tempPrimerQualFileNames,
838                                                                  tempNameFileNames,
839                                                                  lines[process],
840                                                                  qLines[process]);
841                                 
842                                 //pass groupCounts to parent
843                                 if(createGroup){
844                                         ofstream out;
845                                         string tempFile = filename + toString(getpid()) + ".num.temp";
846                                         m->openOutputFile(tempFile, out);
847                                         
848                                         out << groupCounts.size() << endl;
849                                         
850                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
851                                                 out << it->first << '\t' << it->second << endl;
852                                         }
853                                         out.close();
854                                 }
855                                 exit(0);
856                         }else { 
857                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
858                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
859                                 exit(0);
860                         }
861                 }
862                 
863                 //parent do my part
864                 ofstream temp;
865                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
866                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
867                 if(qFileName != ""){
868                         m->openOutputFile(trimQualFileName, temp);              temp.close();
869                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
870                 }
871                 if (nameFile != "") {
872                         m->openOutputFile(trimNameFileName, temp);              temp.close();
873                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
874                 }
875
876                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
877                 
878                 //force parent to wait until all the processes are done
879                 for (int i=0;i<processIDS.size();i++) { 
880                         int temp = processIDS[i];
881                         wait(&temp);
882                 }
883 #else
884         //////////////////////////////////////////////////////////////////////////////////////////////////////
885                 //Windows version shared memory, so be careful when passing variables through the trimData struct. 
886                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
887                 //////////////////////////////////////////////////////////////////////////////////////////////////////
888                 
889                 vector<trimData*> pDataArray; 
890                 DWORD   dwThreadIdArray[processors-1];
891                 HANDLE  hThreadArray[processors-1]; 
892                 
893                 //Create processor worker threads.
894                 for( int i=0; i<processors-1; i++){
895                         
896             string extension = "";
897                         if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
898             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
899             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
900             vector<vector<string> > tempNameFileNames = nameFileNames;
901             
902             if(allFiles){
903                 ofstream temp;
904                 
905                 for(int i=0;i<tempFASTAFileNames.size();i++){
906                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
907                         if (tempFASTAFileNames[i][j] != "") {
908                             tempFASTAFileNames[i][j] += extension;
909                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
910                             
911                             if(qFileName != ""){
912                                 tempPrimerQualFileNames[i][j] += extension;
913                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
914                             }
915                             if(nameFile != ""){
916                                 tempNameFileNames[i][j] += extension;
917                                 m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
918                             }
919                         }
920                     }
921                 }
922             }
923
924             
925                         trimData* tempTrim = new trimData(filename,
926                                               qFileName, nameFile,
927                                               (trimFASTAFileName+extension),
928                                               (scrapFASTAFileName+extension),
929                                               (trimQualFileName+extension),
930                                               (scrapQualFileName+extension),
931                                               (trimNameFileName+extension),
932                                               (scrapNameFileName+extension),
933                                               (groupFile+extension),
934                                               tempFASTAFileNames,
935                                               tempPrimerQualFileNames,
936                                               tempNameFileNames,
937                                               lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
938                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, 
939                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
940                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
941                                              minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
942                         pDataArray.push_back(tempTrim);
943             
944                         hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
945                 }
946         
947         //parent do my part
948                 ofstream temp;
949                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
950                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
951                 if(qFileName != ""){
952                         m->openOutputFile(trimQualFileName, temp);              temp.close();
953                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
954                 }
955                 if (nameFile != "") {
956                         m->openOutputFile(trimNameFileName, temp);              temp.close();
957                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
958                 }
959         
960                 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
961         processIDS.push_back(processors-1);
962
963         
964                 //Wait until all threads have terminated.
965                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
966                 
967                 //Close all thread handles and free memory allocations.
968                 for(int i=0; i < pDataArray.size(); i++){
969                         for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
970                 map<string, int>::iterator it2 = groupCounts.find(it->first);
971                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
972                 else { groupCounts[it->first] += it->second; }
973             }
974             CloseHandle(hThreadArray[i]);
975                         delete pDataArray[i];
976                 }
977         
978 #endif          
979         
980         
981         //append files
982                 for(int i=0;i<processIDS.size();i++){
983                         
984                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
985                         
986                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
987                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
988                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
989                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
990                         
991                         if(qFileName != ""){
992                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
993                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
994                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
995                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
996                         }
997                         
998                         if(nameFile != ""){
999                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1000                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1001                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1002                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1003                         }
1004                         
1005                         if(createGroup){
1006                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1007                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1008                         }
1009                         
1010                         
1011                         if(allFiles){
1012                                 for(int j=0;j<fastaFileNames.size();j++){
1013                                         for(int k=0;k<fastaFileNames[j].size();k++){
1014                                                 if (fastaFileNames[j][k] != "") {
1015                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1016                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1017                                                         
1018                                                         if(qFileName != ""){
1019                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1020                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1021                                                         }
1022                                                         
1023                                                         if(nameFile != ""){
1024                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1025                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1026                                                         }
1027                                                 }
1028                                         }
1029                                 }
1030                         }
1031                         
1032             #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1033                         if(createGroup){
1034                                 ifstream in;
1035                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1036                                 m->openInputFile(tempFile, in);
1037                                 int tempNum;
1038                                 string group;
1039                                 
1040                                 in >> tempNum; m->gobble(in);
1041                                 
1042                                 if (tempNum != 0) {
1043                                         while (!in.eof()) { 
1044                                                 in >> group >> tempNum; m->gobble(in);
1045                         
1046                                                 map<string, int>::iterator it = groupCounts.find(group);
1047                                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
1048                                                 else { groupCounts[it->first] += tempNum; }
1049                                         }
1050                                 }
1051                                 in.close(); m->mothurRemove(tempFile);
1052                         }
1053             #endif
1054                 }
1055
1056         return exitCommand;
1057         }
1058         catch(exception& e) {
1059                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1060                 exit(1);
1061         }
1062 }
1063
1064 /**************************************************************************************************/
1065
1066 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1067         try {
1068         
1069         vector<unsigned long long> fastaFilePos;
1070                 vector<unsigned long long> qfileFilePos;
1071                 
1072                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1073                 //set file positions for fasta file
1074                 fastaFilePos = m->divideFile(filename, processors);
1075                 
1076                 //get name of first sequence in each chunk
1077                 map<string, int> firstSeqNames;
1078                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1079                         ifstream in;
1080                         m->openInputFile(filename, in);
1081                         in.seekg(fastaFilePos[i]);
1082                 
1083                         Sequence temp(in); 
1084                         firstSeqNames[temp.getName()] = i;
1085                 
1086                         in.close();
1087                 }
1088                 
1089                 if(qfilename != "")     {
1090             //seach for filePos of each first name in the qfile and save in qfileFilePos
1091             ifstream inQual;
1092             m->openInputFile(qfilename, inQual);
1093             
1094             string input;
1095             while(!inQual.eof()){       
1096                 input = m->getline(inQual);
1097                 
1098                 if (input.length() != 0) {
1099                     if(input[0] == '>'){ //this is a sequence name line
1100                         istringstream nameStream(input);
1101                         
1102                         string sname = "";  nameStream >> sname;
1103                         sname = sname.substr(1);
1104                         
1105                         map<string, int>::iterator it = firstSeqNames.find(sname);
1106                         
1107                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
1108                             unsigned long long pos = inQual.tellg(); 
1109                             qfileFilePos.push_back(pos - input.length() - 1);   
1110                             firstSeqNames.erase(it);
1111                         }
1112                     }
1113                 }
1114                 
1115                 if (firstSeqNames.size() == 0) { break; }
1116             }
1117             inQual.close();
1118             
1119             
1120             if (firstSeqNames.size() != 0) { 
1121                 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1122                     m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1123                 }
1124                 qFileName = "";
1125                 return processors;
1126             }
1127             
1128             //get last file position of qfile
1129             FILE * pFile;
1130             unsigned long long size;
1131             
1132             //get num bytes in file
1133             pFile = fopen (qfilename.c_str(),"rb");
1134             if (pFile==NULL) perror ("Error opening file");
1135             else{
1136                 fseek (pFile, 0, SEEK_END);
1137                 size=ftell (pFile);
1138                 fclose (pFile);
1139             }
1140             
1141             qfileFilePos.push_back(size);
1142         }
1143         
1144         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1145                         lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1146                         if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
1147                 }       
1148                 if(qfilename == "")     {       qLines = lines; } //files with duds
1149                 
1150                 return processors;
1151                 
1152                 #else
1153             
1154         if (processors == 1) { //save time
1155                         //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1156                         //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1157             lines.push_back(linePair(0, 1000));
1158             if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
1159         }else{
1160             int numFastaSeqs = 0;
1161             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
1162             if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1163         
1164             if (qfilename != "") { 
1165                 int numQualSeqs = 0;
1166                 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
1167                 
1168                 if (numFastaSeqs != numQualSeqs) {
1169                     m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
1170                 }
1171             }
1172         
1173             //figure out how many sequences you have to process
1174             int numSeqsPerProcessor = numFastaSeqs / processors;
1175             for (int i = 0; i < processors; i++) {
1176                 int startIndex =  i * numSeqsPerProcessor;
1177                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
1178                 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1179                 cout << fastaFilePos[startIndex] << '\t' << numSeqsPerProcessor << endl;
1180                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1181             }
1182         
1183             if(qfilename == "") {       qLines = lines; } //files with duds
1184         }
1185                         return 1;
1186                 
1187                 #endif
1188         }
1189         catch(exception& e) {
1190                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1191                 exit(1);
1192         }
1193 }
1194
1195 //***************************************************************************************************************
1196
1197 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1198         try {
1199                 ifstream inOligos;
1200                 m->openInputFile(oligoFile, inOligos);
1201                 
1202                 ofstream test;
1203                 
1204                 string type, oligo, group;
1205
1206                 int indexPrimer = 0;
1207                 int indexBarcode = 0;
1208                 
1209                 while(!inOligos.eof()){
1210
1211                         inOligos >> type; 
1212                                         
1213                         if(type[0] == '#'){
1214                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1215                                 m->gobble(inOligos);
1216                         }
1217                         else{
1218                                 m->gobble(inOligos);
1219                                 //make type case insensitive
1220                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1221                                 
1222                                 inOligos >> oligo;
1223                                 
1224                                 for(int i=0;i<oligo.length();i++){
1225                                         oligo[i] = toupper(oligo[i]);
1226                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1227                                 }
1228                                 
1229                                 if(type == "FORWARD"){
1230                                         group = "";
1231                                         
1232                                         // get rest of line in case there is a primer name
1233                                         while (!inOligos.eof()) {       
1234                                                 char c = inOligos.get(); 
1235                                                 if (c == 10 || c == 13){        break;  }
1236                                                 else if (c == 32 || c == 9){;} //space or tab
1237                                                 else {  group += c;  }
1238                                         } 
1239                                         
1240                                         //check for repeat barcodes
1241                                         map<string, int>::iterator itPrime = primers.find(oligo);
1242                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1243                                         
1244                                         primers[oligo]=indexPrimer; indexPrimer++;              
1245                                         primerNameVector.push_back(group);
1246                                 }
1247                                 else if(type == "REVERSE"){
1248                                         //Sequence oligoRC("reverse", oligo);
1249                                         //oligoRC.reverseComplement();
1250                     string oligoRC = reverseOligo(oligo);
1251                                         revPrimer.push_back(oligoRC);
1252                                 }
1253                                 else if(type == "BARCODE"){
1254                                         inOligos >> group;
1255                                         
1256                                         //check for repeat barcodes
1257                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1258                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1259                                                 
1260                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1261                                         barcodeNameVector.push_back(group);
1262                                 }else if(type == "LINKER"){
1263                                         linker.push_back(oligo);
1264                                 }else if(type == "SPACER"){
1265                                         spacer.push_back(oligo);
1266                                 }
1267                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
1268                         }
1269                         m->gobble(inOligos);
1270                 }       
1271                 inOligos.close();
1272                 
1273                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1274                 
1275                 //add in potential combos
1276                 if(barcodeNameVector.size() == 0){
1277                         barcodes[""] = 0;
1278                         barcodeNameVector.push_back("");                        
1279                 }
1280                 
1281                 if(primerNameVector.size() == 0){
1282                         primers[""] = 0;
1283                         primerNameVector.push_back("");                 
1284                 }
1285                 
1286                 fastaFileNames.resize(barcodeNameVector.size());
1287                 for(int i=0;i<fastaFileNames.size();i++){
1288                         fastaFileNames[i].assign(primerNameVector.size(), "");
1289                 }
1290                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1291                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1292                 
1293                 if(allFiles){
1294                         set<string> uniqueNames; //used to cleanup outputFileNames
1295                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1296                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1297                                         
1298                                         string primerName = primerNameVector[itPrimer->second];
1299                                         string barcodeName = barcodeNameVector[itBar->second];
1300                                         
1301                                         string comboGroupName = "";
1302                                         string fastaFileName = "";
1303                                         string qualFileName = "";
1304                                         string nameFileName = "";
1305                                         
1306                                         if(primerName == ""){
1307                                                 comboGroupName = barcodeNameVector[itBar->second];
1308                                         }
1309                                         else{
1310                                                 if(barcodeName == ""){
1311                                                         comboGroupName = primerNameVector[itPrimer->second];
1312                                                 }
1313                                                 else{
1314                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1315                                                 }
1316                                         }
1317                                         
1318                                         
1319                                         ofstream temp;
1320                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1321                                         if (uniqueNames.count(fastaFileName) == 0) {
1322                                                 outputNames.push_back(fastaFileName);
1323                                                 outputTypes["fasta"].push_back(fastaFileName);
1324                                                 uniqueNames.insert(fastaFileName);
1325                                         }
1326                                         
1327                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1328                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1329                                         
1330                                         if(qFileName != ""){
1331                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1332                                                 if (uniqueNames.count(qualFileName) == 0) {
1333                                                         outputNames.push_back(qualFileName);
1334                                                         outputTypes["qfile"].push_back(qualFileName);
1335                                                 }
1336                                                 
1337                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1338                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1339                                         }
1340                                         
1341                                         if(nameFile != ""){
1342                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1343                                                 if (uniqueNames.count(nameFileName) == 0) {
1344                                                         outputNames.push_back(nameFileName);
1345                                                         outputTypes["name"].push_back(nameFileName);
1346                                                 }
1347                                                 
1348                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1349                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1350                                         }
1351                                         
1352                                 }
1353                         }
1354                 }
1355                 numFPrimers = primers.size();
1356                 numRPrimers = revPrimer.size();
1357         numLinkers = linker.size();
1358         numSpacers = spacer.size();
1359                 
1360                 bool allBlank = true;
1361                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1362                         if (barcodeNameVector[i] != "") {
1363                                 allBlank = false;
1364                                 break;
1365                         }
1366                 }
1367                 for (int i = 0; i < primerNameVector.size(); i++) {
1368                         if (primerNameVector[i] != "") {
1369                                 allBlank = false;
1370                                 break;
1371                         }
1372                 }
1373                 
1374                 if (allBlank) {
1375                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1376                         allFiles = false;
1377                         return false;
1378                 }
1379                 
1380                 return true;
1381                 
1382         }
1383         catch(exception& e) {
1384                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1385                 exit(1);
1386         }
1387 }
1388 //***************************************************************************************************************
1389
1390 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1391         try {
1392                 bool success = 1;
1393                 if(qscores.getName() != ""){
1394                         qscores.trimQScores(-1, keepFirst);
1395                 }
1396                 sequence.trim(keepFirst);
1397                 return success;
1398         }
1399         catch(exception& e) {
1400                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1401                 exit(1);
1402         }
1403         
1404 }       
1405
1406 //***************************************************************************************************************
1407
1408 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1409         try {
1410                 bool success = 0;
1411                 
1412                 int length = sequence.getNumBases() - removeLast;
1413                 
1414                 if(length > 0){
1415                         if(qscores.getName() != ""){
1416                                 qscores.trimQScores(-1, length);
1417                         }
1418                         sequence.trim(length);
1419                         success = 1;
1420                 }
1421                 else{
1422                         success = 0;
1423                 }
1424
1425                 return success;
1426         }
1427         catch(exception& e) {
1428                 m->errorOut(e, "removeLastTrim", "countDiffs");
1429                 exit(1);
1430         }
1431         
1432 }       
1433
1434 //***************************************************************************************************************
1435
1436 bool TrimSeqsCommand::cullLength(Sequence& seq){
1437         try {
1438         
1439                 int length = seq.getNumBases();
1440                 bool success = 0;       //guilty until proven innocent
1441                 
1442                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1443                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1444                 else                                                                                            {       success = 0;    }
1445                 
1446                 return success;
1447         
1448         }
1449         catch(exception& e) {
1450                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1451                 exit(1);
1452         }
1453         
1454 }
1455
1456 //***************************************************************************************************************
1457
1458 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1459         try {
1460                 int longHomoP = seq.getLongHomoPolymer();
1461                 bool success = 0;       //guilty until proven innocent
1462                 
1463                 if(longHomoP <= maxHomoP){      success = 1;    }
1464                 else                                    {       success = 0;    }
1465                 
1466                 return success;
1467         }
1468         catch(exception& e) {
1469                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1470                 exit(1);
1471         }
1472         
1473 }
1474 //********************************************************************/
1475 string TrimSeqsCommand::reverseOligo(string oligo){
1476         try {
1477         string reverse = "";
1478         
1479         for(int i=oligo.length()-1;i>=0;i--){
1480             
1481             if(oligo[i] == 'A')         {       reverse += 'T'; }
1482             else if(oligo[i] == 'T'){   reverse += 'A'; }
1483             else if(oligo[i] == 'U'){   reverse += 'A'; }
1484             
1485             else if(oligo[i] == 'G'){   reverse += 'C'; }
1486             else if(oligo[i] == 'C'){   reverse += 'G'; }
1487             
1488             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1489             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1490             
1491             else if(oligo[i] == 'M'){   reverse += 'K'; }
1492             else if(oligo[i] == 'K'){   reverse += 'M'; }
1493             
1494             else if(oligo[i] == 'W'){   reverse += 'W'; }
1495             else if(oligo[i] == 'S'){   reverse += 'S'; }
1496             
1497             else if(oligo[i] == 'B'){   reverse += 'V'; }
1498             else if(oligo[i] == 'V'){   reverse += 'B'; }
1499             
1500             else if(oligo[i] == 'D'){   reverse += 'H'; }
1501             else if(oligo[i] == 'H'){   reverse += 'D'; }
1502             
1503             else                                                {       reverse += 'N'; }
1504         }
1505         
1506         
1507         return reverse;
1508     }
1509         catch(exception& e) {
1510                 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1511                 exit(1);
1512         }
1513 }
1514
1515 //***************************************************************************************************************
1516
1517 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1518         try {
1519                 int numNs = seq.getAmbigBases();
1520                 bool success = 0;       //guilty until proven innocent
1521                 
1522                 if(numNs <= maxAmbig)   {       success = 1;    }
1523                 else                                    {       success = 0;    }
1524                 
1525                 return success;
1526         }
1527         catch(exception& e) {
1528                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1529                 exit(1);
1530         }
1531         
1532 }
1533 //***************************************************************************************************************