]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
Revert to previous commit
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::setParameters(){        
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
22                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
23                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
24                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
25                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
26                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
27                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
28         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
29                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
30         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
31                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
32                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
33                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
34                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
35                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
36                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
37                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
38                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
39                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
40                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
41                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
42                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
43                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
44                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
45                         
46                 vector<string> myArray;
47                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string TrimSeqsCommand::getHelpString(){        
57         try {
58                 string helpString = "";
59                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
60                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
61                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
62                 helpString += "The fasta parameter is required.\n";
63                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
64                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
65                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
66                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
67                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
68                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
69                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
70                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
71                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
72                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
73         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
74                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
75                 helpString += "The qfile parameter allows you to provide a quality file.\n";
76                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
77                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
78                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
79                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
80                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
81                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
82                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
83                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
84                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
85                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
86                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
87                 helpString += "The trim.seqs command should be in the following format: \n";
88                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
89                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
90                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
91                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
92                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100
101
102 //**********************************************************************************************************************
103
104 TrimSeqsCommand::TrimSeqsCommand(){     
105         try {
106                 abort = true; calledHelp = true; 
107                 setParameters();
108                 vector<string> tempOutNames;
109                 outputTypes["fasta"] = tempOutNames;
110                 outputTypes["qfile"] = tempOutNames;
111                 outputTypes["group"] = tempOutNames;
112                 outputTypes["name"] = tempOutNames;
113         }
114         catch(exception& e) {
115                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
116                 exit(1);
117         }
118 }
119 //***************************************************************************************************************
120
121 TrimSeqsCommand::TrimSeqsCommand(string option)  {
122         try {
123                 
124                 abort = false; calledHelp = false;   
125                 comboStarts = 0;
126                 
127                 //allow user to run help
128                 if(option == "help") { help(); abort = true; calledHelp = true; }
129                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
130                 
131                 else {
132                         vector<string> myArray = setParameters();
133                         
134                         OptionParser parser(option);
135                         map<string,string> parameters = parser.getParameters();
136                         
137                         ValidParameters validParameter;
138                         map<string,string>::iterator it;
139                         
140                         //check to make sure all parameters are valid for command
141                         for (it = parameters.begin(); it != parameters.end(); it++) { 
142                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
143                         }
144                         
145                         //initialize outputTypes
146                         vector<string> tempOutNames;
147                         outputTypes["fasta"] = tempOutNames;
148                         outputTypes["qfile"] = tempOutNames;
149                         outputTypes["group"] = tempOutNames;
150                         outputTypes["name"] = tempOutNames;
151                         
152                         //if the user changes the input directory command factory will send this info to us in the output parameter 
153                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
154                         if (inputDir == "not found"){   inputDir = "";          }
155                         else {
156                                 string path;
157                                 it = parameters.find("fasta");
158                                 //user has given a template file
159                                 if(it != parameters.end()){ 
160                                         path = m->hasPath(it->second);
161                                         //if the user has not given a path then, add inputdir. else leave path alone.
162                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
163                                 }
164                                 
165                                 it = parameters.find("oligos");
166                                 //user has given a template file
167                                 if(it != parameters.end()){ 
168                                         path = m->hasPath(it->second);
169                                         //if the user has not given a path then, add inputdir. else leave path alone.
170                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
171                                 }
172                                 
173                                 it = parameters.find("qfile");
174                                 //user has given a template file
175                                 if(it != parameters.end()){ 
176                                         path = m->hasPath(it->second);
177                                         //if the user has not given a path then, add inputdir. else leave path alone.
178                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
179                                 }
180                                 
181                                 it = parameters.find("name");
182                                 //user has given a template file
183                                 if(it != parameters.end()){ 
184                                         path = m->hasPath(it->second);
185                                         //if the user has not given a path then, add inputdir. else leave path alone.
186                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
187                                 }
188                                 
189                         }
190
191                         
192                         //check for required parameters
193                         fastaFile = validParameter.validFile(parameters, "fasta", true);
194                         if (fastaFile == "not found") {                                 
195                                 fastaFile = m->getFastaFile(); 
196                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
197                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
198                         }else if (fastaFile == "not open") { abort = true; }    
199                         else { m->setFastaFile(fastaFile); }
200                         
201                         //if the user changes the output directory command factory will send this info to us in the output parameter 
202                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
203                                 outputDir = ""; 
204                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
205                         }
206                 
207                         
208                         //check for optional parameter and set defaults
209                         // ...at some point should added some additional type checking...
210                         string temp;
211                         temp = validParameter.validFile(parameters, "flip", false);
212                         if (temp == "not found")    {   flip = 0;       }
213                         else {  flip = m->isTrue(temp);         }
214                 
215                         temp = validParameter.validFile(parameters, "oligos", true);
216                         if (temp == "not found"){       oligoFile = "";         }
217                         else if(temp == "not open"){    abort = true;   } 
218                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
219                         
220                         
221                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
222                         m->mothurConvert(temp, maxAmbig);  
223
224                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
225                         m->mothurConvert(temp, maxHomoP);  
226
227                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
228                         m->mothurConvert(temp, minLength); 
229                         
230                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
231                         m->mothurConvert(temp, maxLength);
232                         
233                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
234                         m->mothurConvert(temp, bdiffs);
235                         
236                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
237                         m->mothurConvert(temp, pdiffs);
238             
239             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
240                         m->mothurConvert(temp, ldiffs);
241             
242             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
243                         m->mothurConvert(temp, sdiffs);
244                         
245                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
246                         m->mothurConvert(temp, tdiffs);
247                         
248                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
249                         
250                         temp = validParameter.validFile(parameters, "qfile", true);     
251                         if (temp == "not found")        {       qFileName = "";         }
252                         else if(temp == "not open")     {       abort = true;           }
253                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
254                         
255                         temp = validParameter.validFile(parameters, "name", true);      
256                         if (temp == "not found")        {       nameFile = "";          }
257                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
258                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
259                         
260                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
261                         m->mothurConvert(temp, qThreshold);
262                         
263                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
264                         qtrim = m->isTrue(temp);
265
266                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
267                         convert(temp, qRollAverage);
268
269                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
270                         convert(temp, qWindowAverage);
271
272                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
273                         convert(temp, qWindowSize);
274
275                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
276                         convert(temp, qWindowStep);
277
278                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
279                         convert(temp, qAverage);
280
281                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
282                         convert(temp, keepFirst);
283
284                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
285                         convert(temp, removeLast);
286                         
287                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
288                         allFiles = m->isTrue(temp);
289             
290             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
291                         keepforward = m->isTrue(temp);
292                         
293                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
294                         m->setProcessors(temp);
295                         m->mothurConvert(temp, processors); 
296                         
297                         
298                         if(allFiles && (oligoFile == "")){
299                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
300                         }
301                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
302                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
303                                 qAverage=0;
304                                 qThreshold=0;
305                         }
306                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
307                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
308                                 abort = true;
309                         }
310                         
311                         if (nameFile == "") {
312                                 vector<string> files; files.push_back(fastaFile);
313                                 parser.getNameFile(files);
314                         }
315                 }
316
317         }
318         catch(exception& e) {
319                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
320                 exit(1);
321         }
322 }
323 //***************************************************************************************************************
324
325 int TrimSeqsCommand::execute(){
326         try{
327         
328                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
329                 
330                 numFPrimers = 0;  //this needs to be initialized
331                 numRPrimers = 0;
332         numSpacers = 0;
333         numLinkers = 0;
334                 createGroup = false;
335                 vector<vector<string> > fastaFileNames;
336                 vector<vector<string> > qualFileNames;
337                 vector<vector<string> > nameFileNames;
338                 
339                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
340                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
341                 
342                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
343                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
344                 
345                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
346                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
347                 
348                 if (qFileName != "") {
349                         outputNames.push_back(trimQualFile);
350                         outputNames.push_back(scrapQualFile);
351                         outputTypes["qfile"].push_back(trimQualFile);
352                         outputTypes["qfile"].push_back(scrapQualFile); 
353                 }
354                 
355                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
356                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
357                 
358                 if (nameFile != "") {
359                         m->readNames(nameFile, nameMap);
360                         outputNames.push_back(trimNameFile);
361                         outputNames.push_back(scrapNameFile);
362                         outputTypes["name"].push_back(trimNameFile);
363                         outputTypes["name"].push_back(scrapNameFile); 
364                 }
365                 
366                 if (m->control_pressed) { return 0; }
367                 
368                 string outputGroupFileName;
369                 if(oligoFile != ""){
370                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
371                         if (createGroup) {
372                                 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
373                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
374                         }
375                 }
376        
377         //fills lines and qlines
378                 setLines(fastaFile, qFileName);
379                 
380         if(processors == 1){
381             driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
382         }else{
383             createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
384         }       
385                 
386                 
387                 if (m->control_pressed) {  return 0; }                  
388         
389                 if(allFiles){
390                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
391                         map<string, string>::iterator it;
392                         set<string> namesToRemove;
393                         for(int i=0;i<fastaFileNames.size();i++){
394                                 for(int j=0;j<fastaFileNames[0].size();j++){
395                                         if (fastaFileNames[i][j] != "") {
396                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
397                                                         if(m->isBlank(fastaFileNames[i][j])){
398                                                                 m->mothurRemove(fastaFileNames[i][j]);
399                                                                 namesToRemove.insert(fastaFileNames[i][j]);
400                                                         
401                                                                 if(qFileName != ""){
402                                                                         m->mothurRemove(qualFileNames[i][j]);
403                                                                         namesToRemove.insert(qualFileNames[i][j]);
404                                                                 }
405                                                                 
406                                                                 if(nameFile != ""){
407                                                                         m->mothurRemove(nameFileNames[i][j]);
408                                                                         namesToRemove.insert(nameFileNames[i][j]);
409                                                                 }
410                                                         }else{  
411                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
412                                                                 if (it == uniqueFastaNames.end()) {     
413                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
414                                                                 }       
415                                                         }
416                                                 }
417                                         }
418                                 }
419                         }
420                         
421                         //remove names for outputFileNames, just cleans up the output
422                         vector<string> outputNames2;
423                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
424                         outputNames = outputNames2;
425                         
426                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
427                                 ifstream in;
428                                 m->openInputFile(it->first, in);
429                                 
430                                 ofstream out;
431                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
432                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
433                                 m->openOutputFile(thisGroupName, out);
434                                 
435                                 while (!in.eof()){
436                                         if (m->control_pressed) { break; }
437                                         
438                                         Sequence currSeq(in); m->gobble(in);
439                                         out << currSeq.getName() << '\t' << it->second << endl;
440                     
441                     if (nameFile != "") {
442                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
443                         if (itName != nameMap.end()) { 
444                             vector<string> thisSeqsNames; 
445                             m->splitAtChar(itName->second, thisSeqsNames, ',');
446                             for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
447                                 out << thisSeqsNames[k] << '\t' << it->second << endl;
448                             }
449                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
450                     }
451                                 }
452                                 in.close();
453                                 out.close();
454                         }
455                 }
456                 
457                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
458
459                 //output group counts
460                 m->mothurOutEndLine();
461                 int total = 0;
462                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
463                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
464                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
465                 }
466                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
467                 
468                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
469
470                 //set fasta file as new current fastafile
471                 string current = "";
472                 itTypes = outputTypes.find("fasta");
473                 if (itTypes != outputTypes.end()) {
474                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
475                 }
476                 
477                 itTypes = outputTypes.find("name");
478                 if (itTypes != outputTypes.end()) {
479                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
480                 }
481                 
482                 itTypes = outputTypes.find("qfile");
483                 if (itTypes != outputTypes.end()) {
484                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
485                 }
486                 
487                 itTypes = outputTypes.find("group");
488                 if (itTypes != outputTypes.end()) {
489                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
490                 }
491
492                 m->mothurOutEndLine();
493                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
494                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
495                 m->mothurOutEndLine();
496                 
497                 return 0;       
498                         
499         }
500         catch(exception& e) {
501                 m->errorOut(e, "TrimSeqsCommand", "execute");
502                 exit(1);
503         }
504 }
505                 
506 /**************************************************************************************/
507
508 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {      
509                 
510         try {
511                 
512                 ofstream trimFASTAFile;
513                 m->openOutputFile(trimFileName, trimFASTAFile);
514                 
515                 ofstream scrapFASTAFile;
516                 m->openOutputFile(scrapFileName, scrapFASTAFile);
517                 
518                 ofstream trimQualFile;
519                 ofstream scrapQualFile;
520                 if(qFileName != ""){
521                         m->openOutputFile(trimQFileName, trimQualFile);
522                         m->openOutputFile(scrapQFileName, scrapQualFile);
523                 }
524                 
525                 ofstream trimNameFile;
526                 ofstream scrapNameFile;
527                 if(nameFile != ""){
528                         m->openOutputFile(trimNFileName, trimNameFile);
529                         m->openOutputFile(scrapNFileName, scrapNameFile);
530                 }
531                 
532                 
533                 ofstream outGroupsFile;
534                 if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
535                 if(allFiles){
536                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
537                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
538                                         if (fastaFileNames[i][j] != "") {
539                                                 ofstream temp;
540                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
541                                                 if(qFileName != ""){
542                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
543                                                 }
544                                                 
545                                                 if(nameFile != ""){
546                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
547                                                 }
548                                         }
549                                 }
550                         }
551                 }
552                 
553                 ifstream inFASTA;
554                 m->openInputFile(filename, inFASTA);
555                 inFASTA.seekg(line.start);
556                 
557                 ifstream qFile;
558                 if(qFileName != "")     {
559                         m->openInputFile(qFileName, qFile);
560                         qFile.seekg(qline.start);  
561                 }
562                 
563                 int count = 0;
564                 bool moreSeqs = 1;
565                 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
566         
567                 while (moreSeqs) {
568                                 
569                         if (m->control_pressed) { 
570                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
571                                 if (createGroup) {       outGroupsFile.close();   }
572
573                                 if(qFileName != ""){
574                                         qFile.close();
575                                 }
576                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
577
578                                 return 0;
579                         }
580                         
581                         int success = 1;
582                         string trashCode = "";
583                         int currentSeqsDiffs = 0;
584
585                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
586                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
587                         QualityScores currQual;
588                         if(qFileName != ""){
589                                 currQual = QualityScores(qFile);  m->gobble(qFile);
590                         }
591                         
592                         string origSeq = currSeq.getUnaligned();
593                         if (origSeq != "") {
594                                 
595                                 int barcodeIndex = 0;
596                                 int primerIndex = 0;
597                                 
598                 if(numLinkers != 0){
599                                         success = trimOligos.stripLinker(currSeq, currQual);
600                                         if(success > ldiffs)            {       trashCode += 'k';       }
601                                         else{ currentSeqsDiffs += success;  }
602
603                                 }
604                 
605                                 if(barcodes.size() != 0){
606                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
607                                         if(success > bdiffs)            {       trashCode += 'b';       }
608                                         else{ currentSeqsDiffs += success;  }
609                                 }
610                                 
611                 if(numSpacers != 0){
612                                         success = trimOligos.stripSpacer(currSeq, currQual);
613                                         if(success > sdiffs)            {       trashCode += 's';       }
614                                         else{ currentSeqsDiffs += success;  }
615
616                                 }
617                 
618                                 if(numFPrimers != 0){
619                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
620                                         if(success > pdiffs)            {       trashCode += 'f';       }
621                                         else{ currentSeqsDiffs += success;  }
622                                 }
623                                 
624                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
625                                 
626                                 if(numRPrimers != 0){
627                                         success = trimOligos.stripReverse(currSeq, currQual);
628                                         if(!success)                            {       trashCode += 'r';       }
629                                 }
630
631                                 if(keepFirst != 0){
632                                         success = keepFirstTrim(currSeq, currQual);
633                                 }
634                                 
635                                 if(removeLast != 0){
636                                         success = removeLastTrim(currSeq, currQual);
637                                         if(!success)                            {       trashCode += 'l';       }
638                                 }
639
640                                 
641                                 if(qFileName != ""){
642                                         int origLength = currSeq.getNumBases();
643                                         
644                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
645                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
646                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
647                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
648                                         else                                            {       success = 1;                            }
649                                         
650                                         //you don't want to trim, if it fails above then scrap it
651                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
652                                         
653                                         if(!success)                            {       trashCode += 'q';       }
654                                 }                               
655                 
656                                 if(minLength > 0 || maxLength > 0){
657                                         success = cullLength(currSeq);
658                                         if(!success)                            {       trashCode += 'l';       }
659                                 }
660                                 if(maxHomoP > 0){
661                                         success = cullHomoP(currSeq);
662                                         if(!success)                            {       trashCode += 'h';       }
663                                 }
664                                 if(maxAmbig != -1){
665                                         success = cullAmbigs(currSeq);
666                                         if(!success)                            {       trashCode += 'n';       }
667                                 }
668                                 
669                                 if(flip){               // should go last                       
670                                         currSeq.reverseComplement();
671                                         if(qFileName != ""){
672                                                 currQual.flipQScores(); 
673                                         }
674                                 }
675                                 
676                                 if(trashCode.length() == 0){
677                                         currSeq.setAligned(currSeq.getUnaligned());
678                                         currSeq.printSequence(trimFASTAFile);
679                                         
680                                         if(qFileName != ""){
681                                                 currQual.printQScores(trimQualFile);
682                                         }
683                                         
684                                         if(nameFile != ""){
685                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
686                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
687                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
688                                         }
689                                         
690                                         if (createGroup) {
691                                                 if(barcodes.size() != 0){
692                                                         string thisGroup = barcodeNameVector[barcodeIndex];
693                                                         if (primers.size() != 0) { 
694                                                                 if (primerNameVector[primerIndex] != "") { 
695                                                                         if(thisGroup != "") {
696                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
697                                                                         }else {
698                                                                                 thisGroup = primerNameVector[primerIndex]; 
699                                                                         }
700                                                                 } 
701                                                         }
702                                                         
703                                                         outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
704                                                         
705                                                         if (nameFile != "") {
706                                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
707                                                                 if (itName != nameMap.end()) { 
708                                                                         vector<string> thisSeqsNames; 
709                                                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
710                                                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
711                                                                                 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
712                                                                         }
713                                                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
714                                                         }
715                                                         
716                                                         map<string, int>::iterator it = groupCounts.find(thisGroup);
717                                                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
718                                                         else { groupCounts[it->first]++; }
719                                                                 
720                                                 }
721                                         }
722                                         
723                                         if(allFiles){
724                                                 ofstream output;
725                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
726                                                 currSeq.printSequence(output);
727                                                 output.close();
728                                                 
729                                                 if(qFileName != ""){
730                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
731                                                         currQual.printQScores(output);
732                                                         output.close();                                                 
733                                                 }
734                                                 
735                                                 if(nameFile != ""){
736                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
737                                                         if (itName != nameMap.end()) { 
738                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
739                                                                 output << itName->first << '\t' << itName->second << endl; 
740                                                                 output.close();
741                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
742                                                 }
743                                         }
744                                 }
745                                 else{
746                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
747                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
748                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
749                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
750                                         }
751                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
752                                         currSeq.setUnaligned(origSeq);
753                                         currSeq.setAligned(origSeq);
754                                         currSeq.printSequence(scrapFASTAFile);
755                                         if(qFileName != ""){
756                                                 currQual.printQScores(scrapQualFile);
757                                         }
758                                 }
759                                 count++;
760                         }
761                         
762                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
763                                 unsigned long long pos = inFASTA.tellg();
764                                 if ((pos == -1) || (pos >= line.end)) { break; }
765                         
766                         #else
767                                 if (inFASTA.eof()) { break; }
768                         #endif
769                         
770                         //report progress
771                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
772                         
773                 }
774                 //report progress
775                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
776                 
777                 
778                 inFASTA.close();
779                 trimFASTAFile.close();
780                 scrapFASTAFile.close();
781                 if (createGroup) {       outGroupsFile.close();   }
782                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
783                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
784                 
785                 return count;
786         }
787         catch(exception& e) {
788                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
789                 exit(1);
790         }
791 }
792
793 /**************************************************************************************************/
794
795 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
796         try {
797         
798         int process = 1;
799                 int exitCommand = 1;
800                 processIDS.clear();
801                 
802 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
803                                 //loop through and create all the processes you want
804                 while (process != processors) {
805                         int pid = fork();
806                         
807                         if (pid > 0) {
808                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
809                                 process++;
810                         }else if (pid == 0){
811                                 
812                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
813                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
814                                 vector<vector<string> > tempNameFileNames = nameFileNames;
815
816                                 if(allFiles){
817                                         ofstream temp;
818
819                                         for(int i=0;i<tempFASTAFileNames.size();i++){
820                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
821                                                         if (tempFASTAFileNames[i][j] != "") {
822                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
823                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
824
825                                                                 if(qFileName != ""){
826                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
827                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
828                                                                 }
829                                                                 if(nameFile != ""){
830                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
831                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
832                                                                 }
833                                                         }
834                                                 }
835                                         }
836                                 }
837                                                         
838                                 driverCreateTrim(filename,
839                                                                  qFileName,
840                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
841                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
842                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
843                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
844                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
845                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
846                                                                  (groupFile + toString(getpid()) + ".temp"),
847                                                                  tempFASTAFileNames,
848                                                                  tempPrimerQualFileNames,
849                                                                  tempNameFileNames,
850                                                                  lines[process],
851                                                                  qLines[process]);
852                                 
853                                 //pass groupCounts to parent
854                                 if(createGroup){
855                                         ofstream out;
856                                         string tempFile = filename + toString(getpid()) + ".num.temp";
857                                         m->openOutputFile(tempFile, out);
858                                         
859                                         out << groupCounts.size() << endl;
860                                         
861                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
862                                                 out << it->first << '\t' << it->second << endl;
863                                         }
864                                         out.close();
865                                 }
866                                 exit(0);
867                         }else { 
868                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
869                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
870                                 exit(0);
871                         }
872                 }
873                 
874                 //parent do my part
875                 ofstream temp;
876                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
877                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
878                 if(qFileName != ""){
879                         m->openOutputFile(trimQualFileName, temp);              temp.close();
880                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
881                 }
882                 if (nameFile != "") {
883                         m->openOutputFile(trimNameFileName, temp);              temp.close();
884                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
885                 }
886
887                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
888                 
889                 //force parent to wait until all the processes are done
890                 for (int i=0;i<processIDS.size();i++) { 
891                         int temp = processIDS[i];
892                         wait(&temp);
893                 }
894 #else
895         //////////////////////////////////////////////////////////////////////////////////////////////////////
896                 //Windows version shared memory, so be careful when passing variables through the trimData struct. 
897                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
898                 //////////////////////////////////////////////////////////////////////////////////////////////////////
899                 
900                 vector<trimData*> pDataArray; 
901                 DWORD   dwThreadIdArray[processors-1];
902                 HANDLE  hThreadArray[processors-1]; 
903                 
904                 //Create processor worker threads.
905                 for( int i=0; i<processors-1; i++){
906                         
907             string extension = "";
908                         if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
909             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
910             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
911             vector<vector<string> > tempNameFileNames = nameFileNames;
912             
913             if(allFiles){
914                 ofstream temp;
915                 
916                 for(int i=0;i<tempFASTAFileNames.size();i++){
917                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
918                         if (tempFASTAFileNames[i][j] != "") {
919                             tempFASTAFileNames[i][j] += extension;
920                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
921                             
922                             if(qFileName != ""){
923                                 tempPrimerQualFileNames[i][j] += extension;
924                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
925                             }
926                             if(nameFile != ""){
927                                 tempNameFileNames[i][j] += extension;
928                                 m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
929                             }
930                         }
931                     }
932                 }
933             }
934
935             
936                         trimData* tempTrim = new trimData(filename,
937                                               qFileName, nameFile,
938                                               (trimFASTAFileName+extension),
939                                               (scrapFASTAFileName+extension),
940                                               (trimQualFileName+extension),
941                                               (scrapQualFileName+extension),
942                                               (trimNameFileName+extension),
943                                               (scrapNameFileName+extension),
944                                               (groupFile+extension),
945                                               tempFASTAFileNames,
946                                               tempPrimerQualFileNames,
947                                               tempNameFileNames,
948                                               lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
949                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, 
950                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
951                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
952                                              minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
953                         pDataArray.push_back(tempTrim);
954             
955                         hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
956                 }
957         
958         //parent do my part
959                 ofstream temp;
960                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
961                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
962                 if(qFileName != ""){
963                         m->openOutputFile(trimQualFileName, temp);              temp.close();
964                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
965                 }
966                 if (nameFile != "") {
967                         m->openOutputFile(trimNameFileName, temp);              temp.close();
968                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
969                 }
970         
971                 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
972         processIDS.push_back(processors-1);
973
974         
975                 //Wait until all threads have terminated.
976                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
977                 
978                 //Close all thread handles and free memory allocations.
979                 for(int i=0; i < pDataArray.size(); i++){
980                         for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
981                 map<string, int>::iterator it2 = groupCounts.find(it->first);
982                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
983                 else { groupCounts[it->first] += it->second; }
984             }
985             CloseHandle(hThreadArray[i]);
986                         delete pDataArray[i];
987                 }
988         
989 #endif          
990         
991         
992         //append files
993                 for(int i=0;i<processIDS.size();i++){
994                         
995                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
996                         
997                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
998                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
999                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1000                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1001                         
1002                         if(qFileName != ""){
1003                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1004                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1005                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1006                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1007                         }
1008                         
1009                         if(nameFile != ""){
1010                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1011                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1012                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1013                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1014                         }
1015                         
1016                         if(createGroup){
1017                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1018                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1019                         }
1020                         
1021                         
1022                         if(allFiles){
1023                                 for(int j=0;j<fastaFileNames.size();j++){
1024                                         for(int k=0;k<fastaFileNames[j].size();k++){
1025                                                 if (fastaFileNames[j][k] != "") {
1026                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1027                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1028                                                         
1029                                                         if(qFileName != ""){
1030                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1031                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1032                                                         }
1033                                                         
1034                                                         if(nameFile != ""){
1035                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1036                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1037                                                         }
1038                                                 }
1039                                         }
1040                                 }
1041                         }
1042                         
1043             #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1044                         if(createGroup){
1045                                 ifstream in;
1046                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1047                                 m->openInputFile(tempFile, in);
1048                                 int tempNum;
1049                                 string group;
1050                                 
1051                                 in >> tempNum; m->gobble(in);
1052                                 
1053                                 if (tempNum != 0) {
1054                                         while (!in.eof()) { 
1055                                                 in >> group >> tempNum; m->gobble(in);
1056                         
1057                                                 map<string, int>::iterator it = groupCounts.find(group);
1058                                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
1059                                                 else { groupCounts[it->first] += tempNum; }
1060                                         }
1061                                 }
1062                                 in.close(); m->mothurRemove(tempFile);
1063                         }
1064             #endif
1065                 }
1066
1067         return exitCommand;
1068         }
1069         catch(exception& e) {
1070                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1071                 exit(1);
1072         }
1073 }
1074
1075 /**************************************************************************************************/
1076
1077 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1078         try {
1079         
1080         vector<unsigned long long> fastaFilePos;
1081                 vector<unsigned long long> qfileFilePos;
1082                 
1083                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1084                 //set file positions for fasta file
1085                 fastaFilePos = m->divideFile(filename, processors);
1086                 
1087                 //get name of first sequence in each chunk
1088                 map<string, int> firstSeqNames;
1089                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1090                         ifstream in;
1091                         m->openInputFile(filename, in);
1092                         in.seekg(fastaFilePos[i]);
1093                 
1094                         Sequence temp(in); 
1095                         firstSeqNames[temp.getName()] = i;
1096                 
1097                         in.close();
1098                 }
1099                 
1100                 if(qfilename != "")     {
1101             //seach for filePos of each first name in the qfile and save in qfileFilePos
1102             ifstream inQual;
1103             m->openInputFile(qfilename, inQual);
1104             
1105             string input;
1106             while(!inQual.eof()){       
1107                 input = m->getline(inQual);
1108                 
1109                 if (input.length() != 0) {
1110                     if(input[0] == '>'){ //this is a sequence name line
1111                         istringstream nameStream(input);
1112                         
1113                         string sname = "";  nameStream >> sname;
1114                         sname = sname.substr(1);
1115                         
1116                         map<string, int>::iterator it = firstSeqNames.find(sname);
1117                         
1118                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
1119                             unsigned long long pos = inQual.tellg(); 
1120                             qfileFilePos.push_back(pos - input.length() - 1);   
1121                             firstSeqNames.erase(it);
1122                         }
1123                     }
1124                 }
1125                 
1126                 if (firstSeqNames.size() == 0) { break; }
1127             }
1128             inQual.close();
1129             
1130             
1131             if (firstSeqNames.size() != 0) { 
1132                 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1133                     m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1134                 }
1135                 qFileName = "";
1136                 return processors;
1137             }
1138             
1139             //get last file position of qfile
1140             FILE * pFile;
1141             unsigned long long size;
1142             
1143             //get num bytes in file
1144             pFile = fopen (qfilename.c_str(),"rb");
1145             if (pFile==NULL) perror ("Error opening file");
1146             else{
1147                 fseek (pFile, 0, SEEK_END);
1148                 size=ftell (pFile);
1149                 fclose (pFile);
1150             }
1151             
1152             qfileFilePos.push_back(size);
1153         }
1154         
1155         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1156                         lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1157                         if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
1158                 }       
1159                 if(qfilename == "")     {       qLines = lines; } //files with duds
1160                 
1161                 return processors;
1162                 
1163                 #else
1164             
1165         if (processors == 1) { //save time
1166                         //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1167                         //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1168             lines.push_back(linePair(0, 1000));
1169             if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
1170         }else{
1171             int numFastaSeqs = 0;
1172             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
1173             if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1174         
1175             if (qfilename != "") { 
1176                 int numQualSeqs = 0;
1177                 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
1178                 
1179                 if (numFastaSeqs != numQualSeqs) {
1180                     m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
1181                 }
1182             }
1183         
1184             //figure out how many sequences you have to process
1185             int numSeqsPerProcessor = numFastaSeqs / processors;
1186             for (int i = 0; i < processors; i++) {
1187                 int startIndex =  i * numSeqsPerProcessor;
1188                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
1189                 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1190                 cout << fastaFilePos[startIndex] << '\t' << numSeqsPerProcessor << endl;
1191                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1192             }
1193         }
1194             if(qfilename == "") {       qLines = lines; } //files with duds
1195                         return 1;
1196                 
1197                 #endif
1198         }
1199         catch(exception& e) {
1200                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1201                 exit(1);
1202         }
1203 }
1204
1205 //***************************************************************************************************************
1206
1207 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1208         try {
1209                 ifstream inOligos;
1210                 m->openInputFile(oligoFile, inOligos);
1211                 
1212                 ofstream test;
1213                 
1214                 string type, oligo, group;
1215
1216                 int indexPrimer = 0;
1217                 int indexBarcode = 0;
1218                 
1219                 while(!inOligos.eof()){
1220
1221                         inOligos >> type; 
1222                                         
1223                         if(type[0] == '#'){
1224                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1225                                 m->gobble(inOligos);
1226                         }
1227                         else{
1228                                 m->gobble(inOligos);
1229                                 //make type case insensitive
1230                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1231                                 
1232                                 inOligos >> oligo;
1233                                 
1234                                 for(int i=0;i<oligo.length();i++){
1235                                         oligo[i] = toupper(oligo[i]);
1236                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1237                                 }
1238                                 
1239                                 if(type == "FORWARD"){
1240                                         group = "";
1241                                         
1242                                         // get rest of line in case there is a primer name
1243                                         while (!inOligos.eof()) {       
1244                                                 char c = inOligos.get(); 
1245                                                 if (c == 10 || c == 13){        break;  }
1246                                                 else if (c == 32 || c == 9){;} //space or tab
1247                                                 else {  group += c;  }
1248                                         } 
1249                                         
1250                                         //check for repeat barcodes
1251                                         map<string, int>::iterator itPrime = primers.find(oligo);
1252                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1253                                         
1254                                         primers[oligo]=indexPrimer; indexPrimer++;              
1255                                         primerNameVector.push_back(group);
1256                                 }
1257                                 else if(type == "REVERSE"){
1258                                         //Sequence oligoRC("reverse", oligo);
1259                                         //oligoRC.reverseComplement();
1260                     string oligoRC = reverseOligo(oligo);
1261                                         revPrimer.push_back(oligoRC);
1262                                 }
1263                                 else if(type == "BARCODE"){
1264                                         inOligos >> group;
1265                                         
1266                                         //check for repeat barcodes
1267                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1268                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1269                                                 
1270                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1271                                         barcodeNameVector.push_back(group);
1272                                 }else if(type == "LINKER"){
1273                                         linker.push_back(oligo);
1274                                 }else if(type == "SPACER"){
1275                                         spacer.push_back(oligo);
1276                                 }
1277                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
1278                         }
1279                         m->gobble(inOligos);
1280                 }       
1281                 inOligos.close();
1282                 
1283                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1284                 
1285                 //add in potential combos
1286                 if(barcodeNameVector.size() == 0){
1287                         barcodes[""] = 0;
1288                         barcodeNameVector.push_back("");                        
1289                 }
1290                 
1291                 if(primerNameVector.size() == 0){
1292                         primers[""] = 0;
1293                         primerNameVector.push_back("");                 
1294                 }
1295                 
1296                 fastaFileNames.resize(barcodeNameVector.size());
1297                 for(int i=0;i<fastaFileNames.size();i++){
1298                         fastaFileNames[i].assign(primerNameVector.size(), "");
1299                 }
1300                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1301                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1302                 
1303                 if(allFiles){
1304                         set<string> uniqueNames; //used to cleanup outputFileNames
1305                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1306                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1307                                         
1308                                         string primerName = primerNameVector[itPrimer->second];
1309                                         string barcodeName = barcodeNameVector[itBar->second];
1310                                         
1311                                         string comboGroupName = "";
1312                                         string fastaFileName = "";
1313                                         string qualFileName = "";
1314                                         string nameFileName = "";
1315                                         
1316                                         if(primerName == ""){
1317                                                 comboGroupName = barcodeNameVector[itBar->second];
1318                                         }
1319                                         else{
1320                                                 if(barcodeName == ""){
1321                                                         comboGroupName = primerNameVector[itPrimer->second];
1322                                                 }
1323                                                 else{
1324                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1325                                                 }
1326                                         }
1327                                         
1328                                         
1329                                         ofstream temp;
1330                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1331                                         if (uniqueNames.count(fastaFileName) == 0) {
1332                                                 outputNames.push_back(fastaFileName);
1333                                                 outputTypes["fasta"].push_back(fastaFileName);
1334                                                 uniqueNames.insert(fastaFileName);
1335                                         }
1336                                         
1337                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1338                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1339                                         
1340                                         if(qFileName != ""){
1341                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1342                                                 if (uniqueNames.count(qualFileName) == 0) {
1343                                                         outputNames.push_back(qualFileName);
1344                                                         outputTypes["qfile"].push_back(qualFileName);
1345                                                 }
1346                                                 
1347                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1348                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1349                                         }
1350                                         
1351                                         if(nameFile != ""){
1352                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1353                                                 if (uniqueNames.count(nameFileName) == 0) {
1354                                                         outputNames.push_back(nameFileName);
1355                                                         outputTypes["name"].push_back(nameFileName);
1356                                                 }
1357                                                 
1358                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1359                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1360                                         }
1361                                         
1362                                 }
1363                         }
1364                 }
1365                 numFPrimers = primers.size();
1366                 numRPrimers = revPrimer.size();
1367         numLinkers = linker.size();
1368         numSpacers = spacer.size();
1369                 
1370                 bool allBlank = true;
1371                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1372                         if (barcodeNameVector[i] != "") {
1373                                 allBlank = false;
1374                                 break;
1375                         }
1376                 }
1377                 for (int i = 0; i < primerNameVector.size(); i++) {
1378                         if (primerNameVector[i] != "") {
1379                                 allBlank = false;
1380                                 break;
1381                         }
1382                 }
1383                 
1384                 if (allBlank) {
1385                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1386                         allFiles = false;
1387                         return false;
1388                 }
1389                 
1390                 return true;
1391                 
1392         }
1393         catch(exception& e) {
1394                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1395                 exit(1);
1396         }
1397 }
1398 //***************************************************************************************************************
1399
1400 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1401         try {
1402                 bool success = 1;
1403                 if(qscores.getName() != ""){
1404                         qscores.trimQScores(-1, keepFirst);
1405                 }
1406                 sequence.trim(keepFirst);
1407                 return success;
1408         }
1409         catch(exception& e) {
1410                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1411                 exit(1);
1412         }
1413         
1414 }       
1415
1416 //***************************************************************************************************************
1417
1418 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1419         try {
1420                 bool success = 0;
1421                 
1422                 int length = sequence.getNumBases() - removeLast;
1423                 
1424                 if(length > 0){
1425                         if(qscores.getName() != ""){
1426                                 qscores.trimQScores(-1, length);
1427                         }
1428                         sequence.trim(length);
1429                         success = 1;
1430                 }
1431                 else{
1432                         success = 0;
1433                 }
1434
1435                 return success;
1436         }
1437         catch(exception& e) {
1438                 m->errorOut(e, "removeLastTrim", "countDiffs");
1439                 exit(1);
1440         }
1441         
1442 }       
1443
1444 //***************************************************************************************************************
1445
1446 bool TrimSeqsCommand::cullLength(Sequence& seq){
1447         try {
1448         
1449                 int length = seq.getNumBases();
1450                 bool success = 0;       //guilty until proven innocent
1451                 
1452                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1453                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1454                 else                                                                                            {       success = 0;    }
1455                 
1456                 return success;
1457         
1458         }
1459         catch(exception& e) {
1460                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1461                 exit(1);
1462         }
1463         
1464 }
1465
1466 //***************************************************************************************************************
1467
1468 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1469         try {
1470                 int longHomoP = seq.getLongHomoPolymer();
1471                 bool success = 0;       //guilty until proven innocent
1472                 
1473                 if(longHomoP <= maxHomoP){      success = 1;    }
1474                 else                                    {       success = 0;    }
1475                 
1476                 return success;
1477         }
1478         catch(exception& e) {
1479                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1480                 exit(1);
1481         }
1482         
1483 }
1484 //********************************************************************/
1485 string TrimSeqsCommand::reverseOligo(string oligo){
1486         try {
1487         string reverse = "";
1488         
1489         for(int i=oligo.length()-1;i>=0;i--){
1490             
1491             if(oligo[i] == 'A')         {       reverse += 'T'; }
1492             else if(oligo[i] == 'T'){   reverse += 'A'; }
1493             else if(oligo[i] == 'U'){   reverse += 'A'; }
1494             
1495             else if(oligo[i] == 'G'){   reverse += 'C'; }
1496             else if(oligo[i] == 'C'){   reverse += 'G'; }
1497             
1498             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1499             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1500             
1501             else if(oligo[i] == 'M'){   reverse += 'K'; }
1502             else if(oligo[i] == 'K'){   reverse += 'M'; }
1503             
1504             else if(oligo[i] == 'W'){   reverse += 'W'; }
1505             else if(oligo[i] == 'S'){   reverse += 'S'; }
1506             
1507             else if(oligo[i] == 'B'){   reverse += 'V'; }
1508             else if(oligo[i] == 'V'){   reverse += 'B'; }
1509             
1510             else if(oligo[i] == 'D'){   reverse += 'H'; }
1511             else if(oligo[i] == 'H'){   reverse += 'D'; }
1512             
1513             else                                                {       reverse += 'N'; }
1514         }
1515         
1516         
1517         return reverse;
1518     }
1519         catch(exception& e) {
1520                 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1521                 exit(1);
1522         }
1523 }
1524
1525 //***************************************************************************************************************
1526
1527 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1528         try {
1529                 int numNs = seq.getAmbigBases();
1530                 bool success = 0;       //guilty until proven innocent
1531                 
1532                 if(numNs <= maxAmbig)   {       success = 1;    }
1533                 else                                    {       success = 0;    }
1534                 
1535                 return success;
1536         }
1537         catch(exception& e) {
1538                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1539                 exit(1);
1540         }
1541         
1542 }
1543 //***************************************************************************************************************