]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
added subsample and consensus parameters to unifrac.weighted command
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::setParameters(){        
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
22                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
23                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
24                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
25                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
26                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
27                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
28         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
29                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
30         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
31                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
32                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
33                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
34                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
35                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
36                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
37                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
38                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
39                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
40                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
41                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
42                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
43                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
44                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
45                         
46                 vector<string> myArray;
47                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string TrimSeqsCommand::getHelpString(){        
57         try {
58                 string helpString = "";
59                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
60                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
61                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
62                 helpString += "The fasta parameter is required.\n";
63                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
64                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
65                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
66                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
67                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
68                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
69                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
70                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
71                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
72                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
73         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
74                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
75                 helpString += "The qfile parameter allows you to provide a quality file.\n";
76                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
77                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
78                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
79                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
80                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
81                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
82                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
83                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
84                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
85                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
86                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
87                 helpString += "The trim.seqs command should be in the following format: \n";
88                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
89                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
90                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
91                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
92                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100
101
102 //**********************************************************************************************************************
103
104 TrimSeqsCommand::TrimSeqsCommand(){     
105         try {
106                 abort = true; calledHelp = true; 
107                 setParameters();
108                 vector<string> tempOutNames;
109                 outputTypes["fasta"] = tempOutNames;
110                 outputTypes["qfile"] = tempOutNames;
111                 outputTypes["group"] = tempOutNames;
112                 outputTypes["name"] = tempOutNames;
113         }
114         catch(exception& e) {
115                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
116                 exit(1);
117         }
118 }
119 //***************************************************************************************************************
120
121 TrimSeqsCommand::TrimSeqsCommand(string option)  {
122         try {
123                 
124                 abort = false; calledHelp = false;   
125                 comboStarts = 0;
126                 
127                 //allow user to run help
128                 if(option == "help") { help(); abort = true; calledHelp = true; }
129                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
130                 
131                 else {
132                         vector<string> myArray = setParameters();
133                         
134                         OptionParser parser(option);
135                         map<string,string> parameters = parser.getParameters();
136                         
137                         ValidParameters validParameter;
138                         map<string,string>::iterator it;
139                         
140                         //check to make sure all parameters are valid for command
141                         for (it = parameters.begin(); it != parameters.end(); it++) { 
142                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
143                         }
144                         
145                         //initialize outputTypes
146                         vector<string> tempOutNames;
147                         outputTypes["fasta"] = tempOutNames;
148                         outputTypes["qfile"] = tempOutNames;
149                         outputTypes["group"] = tempOutNames;
150                         outputTypes["name"] = tempOutNames;
151                         
152                         //if the user changes the input directory command factory will send this info to us in the output parameter 
153                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
154                         if (inputDir == "not found"){   inputDir = "";          }
155                         else {
156                                 string path;
157                                 it = parameters.find("fasta");
158                                 //user has given a template file
159                                 if(it != parameters.end()){ 
160                                         path = m->hasPath(it->second);
161                                         //if the user has not given a path then, add inputdir. else leave path alone.
162                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
163                                 }
164                                 
165                                 it = parameters.find("oligos");
166                                 //user has given a template file
167                                 if(it != parameters.end()){ 
168                                         path = m->hasPath(it->second);
169                                         //if the user has not given a path then, add inputdir. else leave path alone.
170                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
171                                 }
172                                 
173                                 it = parameters.find("qfile");
174                                 //user has given a template file
175                                 if(it != parameters.end()){ 
176                                         path = m->hasPath(it->second);
177                                         //if the user has not given a path then, add inputdir. else leave path alone.
178                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
179                                 }
180                                 
181                                 it = parameters.find("name");
182                                 //user has given a template file
183                                 if(it != parameters.end()){ 
184                                         path = m->hasPath(it->second);
185                                         //if the user has not given a path then, add inputdir. else leave path alone.
186                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
187                                 }
188                                 
189                         }
190
191                         
192                         //check for required parameters
193                         fastaFile = validParameter.validFile(parameters, "fasta", true);
194                         if (fastaFile == "not found") {                                 
195                                 fastaFile = m->getFastaFile(); 
196                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
197                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
198                         }else if (fastaFile == "not open") { abort = true; }    
199                         else { m->setFastaFile(fastaFile); }
200                         
201                         //if the user changes the output directory command factory will send this info to us in the output parameter 
202                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
203                                 outputDir = ""; 
204                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
205                         }
206                 
207                         
208                         //check for optional parameter and set defaults
209                         // ...at some point should added some additional type checking...
210                         string temp;
211                         temp = validParameter.validFile(parameters, "flip", false);
212                         if (temp == "not found")    {   flip = 0;       }
213                         else {  flip = m->isTrue(temp);         }
214                 
215                         temp = validParameter.validFile(parameters, "oligos", true);
216                         if (temp == "not found"){       oligoFile = "";         }
217                         else if(temp == "not open"){    abort = true;   } 
218                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
219                         
220                         
221                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
222                         m->mothurConvert(temp, maxAmbig);  
223
224                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
225                         m->mothurConvert(temp, maxHomoP);  
226
227                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
228                         m->mothurConvert(temp, minLength); 
229                         
230                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
231                         m->mothurConvert(temp, maxLength);
232                         
233                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
234                         m->mothurConvert(temp, bdiffs);
235                         
236                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
237                         m->mothurConvert(temp, pdiffs);
238             
239             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
240                         m->mothurConvert(temp, ldiffs);
241             
242             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
243                         m->mothurConvert(temp, sdiffs);
244                         
245                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
246                         m->mothurConvert(temp, tdiffs);
247                         
248                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
249                         
250                         temp = validParameter.validFile(parameters, "qfile", true);     
251                         if (temp == "not found")        {       qFileName = "";         }
252                         else if(temp == "not open")     {       abort = true;           }
253                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
254                         
255                         temp = validParameter.validFile(parameters, "name", true);      
256                         if (temp == "not found")        {       nameFile = "";          }
257                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
258                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
259                         
260                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
261                         m->mothurConvert(temp, qThreshold);
262                         
263                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
264                         qtrim = m->isTrue(temp);
265
266                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
267                         convert(temp, qRollAverage);
268
269                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
270                         convert(temp, qWindowAverage);
271
272                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
273                         convert(temp, qWindowSize);
274
275                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
276                         convert(temp, qWindowStep);
277
278                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
279                         convert(temp, qAverage);
280
281                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
282                         convert(temp, keepFirst);
283
284                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
285                         convert(temp, removeLast);
286                         
287                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
288                         allFiles = m->isTrue(temp);
289             
290             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
291                         keepforward = m->isTrue(temp);
292                         
293                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
294                         m->setProcessors(temp);
295                         m->mothurConvert(temp, processors); 
296                         
297                         
298                         if(allFiles && (oligoFile == "")){
299                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
300                         }
301                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
302                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
303                                 qAverage=0;
304                                 qThreshold=0;
305                         }
306                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
307                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
308                                 abort = true;
309                         }
310                         
311                         if (nameFile == "") {
312                                 vector<string> files; files.push_back(fastaFile);
313                                 parser.getNameFile(files);
314                         }
315                 }
316
317         }
318         catch(exception& e) {
319                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
320                 exit(1);
321         }
322 }
323 //***************************************************************************************************************
324
325 int TrimSeqsCommand::execute(){
326         try{
327         
328                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
329                 
330                 numFPrimers = 0;  //this needs to be initialized
331                 numRPrimers = 0;
332         numSpacers = 0;
333         numLinkers = 0;
334                 createGroup = false;
335                 vector<vector<string> > fastaFileNames;
336                 vector<vector<string> > qualFileNames;
337                 vector<vector<string> > nameFileNames;
338                 
339                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
340                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
341                 
342                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
343                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
344                 
345                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
346                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
347                 
348                 if (qFileName != "") {
349                         outputNames.push_back(trimQualFile);
350                         outputNames.push_back(scrapQualFile);
351                         outputTypes["qfile"].push_back(trimQualFile);
352                         outputTypes["qfile"].push_back(scrapQualFile); 
353                 }
354                 
355                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
356                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
357                 
358                 if (nameFile != "") {
359                         m->readNames(nameFile, nameMap);
360                         outputNames.push_back(trimNameFile);
361                         outputNames.push_back(scrapNameFile);
362                         outputTypes["name"].push_back(trimNameFile);
363                         outputTypes["name"].push_back(scrapNameFile); 
364                 }
365                 
366                 if (m->control_pressed) { return 0; }
367                 
368                 string outputGroupFileName;
369                 if(oligoFile != ""){
370                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
371                         if (createGroup) {
372                                 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
373                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
374                         }
375                 }
376        
377         //fills lines and qlines
378                 setLines(fastaFile, qFileName);
379                 
380         if(processors == 1){
381             driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
382         }else{
383             createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
384         }       
385                 
386                 
387                 if (m->control_pressed) {  return 0; }                  
388         
389                 if(allFiles){
390                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
391                         map<string, string>::iterator it;
392                         set<string> namesToRemove;
393                         for(int i=0;i<fastaFileNames.size();i++){
394                                 for(int j=0;j<fastaFileNames[0].size();j++){
395                                         if (fastaFileNames[i][j] != "") {
396                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
397                                                         if(m->isBlank(fastaFileNames[i][j])){
398                                                                 m->mothurRemove(fastaFileNames[i][j]);
399                                                                 namesToRemove.insert(fastaFileNames[i][j]);
400                                                         
401                                                                 if(qFileName != ""){
402                                                                         m->mothurRemove(qualFileNames[i][j]);
403                                                                         namesToRemove.insert(qualFileNames[i][j]);
404                                                                 }
405                                                                 
406                                                                 if(nameFile != ""){
407                                                                         m->mothurRemove(nameFileNames[i][j]);
408                                                                         namesToRemove.insert(nameFileNames[i][j]);
409                                                                 }
410                                                         }else{  
411                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
412                                                                 if (it == uniqueFastaNames.end()) {     
413                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
414                                                                 }       
415                                                         }
416                                                 }
417                                         }
418                                 }
419                         }
420                         
421                         //remove names for outputFileNames, just cleans up the output
422                         vector<string> outputNames2;
423                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
424                         outputNames = outputNames2;
425                         
426                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
427                                 ifstream in;
428                                 m->openInputFile(it->first, in);
429                                 
430                                 ofstream out;
431                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
432                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
433                                 m->openOutputFile(thisGroupName, out);
434                                 
435                                 while (!in.eof()){
436                                         if (m->control_pressed) { break; }
437                                         
438                                         Sequence currSeq(in); m->gobble(in);
439                                         out << currSeq.getName() << '\t' << it->second << endl;
440                     
441                     if (nameFile != "") {
442                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
443                         if (itName != nameMap.end()) { 
444                             vector<string> thisSeqsNames; 
445                             m->splitAtChar(itName->second, thisSeqsNames, ',');
446                             for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
447                                 out << thisSeqsNames[k] << '\t' << it->second << endl;
448                             }
449                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
450                     }
451                                 }
452                                 in.close();
453                                 out.close();
454                         }
455                 }
456                 
457                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
458
459                 //output group counts
460                 m->mothurOutEndLine();
461                 int total = 0;
462                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
463                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
464                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
465                 }
466                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
467                 
468                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
469
470                 //set fasta file as new current fastafile
471                 string current = "";
472                 itTypes = outputTypes.find("fasta");
473                 if (itTypes != outputTypes.end()) {
474                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
475                 }
476                 
477                 itTypes = outputTypes.find("name");
478                 if (itTypes != outputTypes.end()) {
479                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
480                 }
481                 
482                 itTypes = outputTypes.find("qfile");
483                 if (itTypes != outputTypes.end()) {
484                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
485                 }
486                 
487                 itTypes = outputTypes.find("group");
488                 if (itTypes != outputTypes.end()) {
489                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
490                 }
491
492                 m->mothurOutEndLine();
493                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
494                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
495                 m->mothurOutEndLine();
496                 
497                 return 0;       
498                         
499         }
500         catch(exception& e) {
501                 m->errorOut(e, "TrimSeqsCommand", "execute");
502                 exit(1);
503         }
504 }
505                 
506 /**************************************************************************************/
507
508 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {      
509                 
510         try {
511                 
512                 ofstream trimFASTAFile;
513                 m->openOutputFile(trimFileName, trimFASTAFile);
514                 
515                 ofstream scrapFASTAFile;
516                 m->openOutputFile(scrapFileName, scrapFASTAFile);
517                 
518                 ofstream trimQualFile;
519                 ofstream scrapQualFile;
520                 if(qFileName != ""){
521                         m->openOutputFile(trimQFileName, trimQualFile);
522                         m->openOutputFile(scrapQFileName, scrapQualFile);
523                 }
524                 
525                 ofstream trimNameFile;
526                 ofstream scrapNameFile;
527                 if(nameFile != ""){
528                         m->openOutputFile(trimNFileName, trimNameFile);
529                         m->openOutputFile(scrapNFileName, scrapNameFile);
530                 }
531                 
532                 
533                 ofstream outGroupsFile;
534                 if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
535                 if(allFiles){
536                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
537                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
538                                         if (fastaFileNames[i][j] != "") {
539                                                 ofstream temp;
540                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
541                                                 if(qFileName != ""){
542                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
543                                                 }
544                                                 
545                                                 if(nameFile != ""){
546                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
547                                                 }
548                                         }
549                                 }
550                         }
551                 }
552                 
553                 ifstream inFASTA;
554                 m->openInputFile(filename, inFASTA);
555                 inFASTA.seekg(line.start);
556                 
557                 ifstream qFile;
558                 if(qFileName != "")     {
559                         m->openInputFile(qFileName, qFile);
560                         qFile.seekg(qline.start);  
561                 }
562                 
563                 int count = 0;
564                 bool moreSeqs = 1;
565                 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
566         
567                 while (moreSeqs) {
568                                 
569                         if (m->control_pressed) { 
570                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
571                                 if (createGroup) {       outGroupsFile.close();   }
572
573                                 if(qFileName != ""){
574                                         qFile.close();
575                                 }
576                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
577
578                                 return 0;
579                         }
580                         
581                         int success = 1;
582                         string trashCode = "";
583                         int currentSeqsDiffs = 0;
584
585                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
586                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
587                         QualityScores currQual;
588                         if(qFileName != ""){
589                                 currQual = QualityScores(qFile);  m->gobble(qFile);
590                         }
591                         
592                         string origSeq = currSeq.getUnaligned();
593                         if (origSeq != "") {
594                                 
595                                 int barcodeIndex = 0;
596                                 int primerIndex = 0;
597                                 
598                 if(numLinkers != 0){
599                                         success = trimOligos.stripLinker(currSeq, currQual);
600                                         if(success > ldiffs)            {       trashCode += 'k';       }
601                                         else{ currentSeqsDiffs += success;  }
602
603                                 }
604                 
605                                 if(barcodes.size() != 0){
606                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
607                                         if(success > bdiffs)            {       trashCode += 'b';       }
608                                         else{ currentSeqsDiffs += success;  }
609                                 }
610                                 
611                 if(numSpacers != 0){
612                                         success = trimOligos.stripSpacer(currSeq, currQual);
613                                         if(success > sdiffs)            {       trashCode += 's';       }
614                                         else{ currentSeqsDiffs += success;  }
615
616                                 }
617                 
618                                 if(numFPrimers != 0){
619                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
620                                         if(success > pdiffs)            {       trashCode += 'f';       }
621                                         else{ currentSeqsDiffs += success;  }
622                                 }
623                                 
624                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
625                                 
626                                 if(numRPrimers != 0){
627                                         success = trimOligos.stripReverse(currSeq, currQual);
628                                         if(!success)                            {       trashCode += 'r';       }
629                                 }
630
631                                 if(keepFirst != 0){
632                                         success = keepFirstTrim(currSeq, currQual);
633                                 }
634                                 
635                                 if(removeLast != 0){
636                                         success = removeLastTrim(currSeq, currQual);
637                                         if(!success)                            {       trashCode += 'l';       }
638                                 }
639
640                                 
641                                 if(qFileName != ""){
642                                         int origLength = currSeq.getNumBases();
643                                         
644                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
645                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
646                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
647                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
648                                         else                                            {       success = 1;                            }
649                                         
650                                         //you don't want to trim, if it fails above then scrap it
651                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
652                                         
653                                         if(!success)                            {       trashCode += 'q';       }
654                                 }                               
655                 
656                                 if(minLength > 0 || maxLength > 0){
657                                         success = cullLength(currSeq);
658                                         if(!success)                            {       trashCode += 'l';       }
659                                 }
660                                 if(maxHomoP > 0){
661                                         success = cullHomoP(currSeq);
662                                         if(!success)                            {       trashCode += 'h';       }
663                                 }
664                                 if(maxAmbig != -1){
665                                         success = cullAmbigs(currSeq);
666                                         if(!success)                            {       trashCode += 'n';       }
667                                 }
668                                 
669                                 if(flip){               // should go last                       
670                                         currSeq.reverseComplement();
671                                         if(qFileName != ""){
672                                                 currQual.flipQScores(); 
673                                         }
674                                 }
675                                 
676                                 if(trashCode.length() == 0){
677                                         currSeq.setAligned(currSeq.getUnaligned());
678                                         currSeq.printSequence(trimFASTAFile);
679                                         
680                                         if(qFileName != ""){
681                                                 currQual.printQScores(trimQualFile);
682                                         }
683                                         
684                                         if(nameFile != ""){
685                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
686                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
687                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
688                                         }
689                                         
690                                         if (createGroup) {
691                                                 if(barcodes.size() != 0){
692                                                         string thisGroup = barcodeNameVector[barcodeIndex];
693                                                         if (primers.size() != 0) { 
694                                                                 if (primerNameVector[primerIndex] != "") { 
695                                                                         if(thisGroup != "") {
696                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
697                                                                         }else {
698                                                                                 thisGroup = primerNameVector[primerIndex]; 
699                                                                         }
700                                                                 } 
701                                                         }
702                                                         
703                                                         outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
704                                                         
705                                                         if (nameFile != "") {
706                                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
707                                                                 if (itName != nameMap.end()) { 
708                                                                         vector<string> thisSeqsNames; 
709                                                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
710                                                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
711                                                                                 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
712                                                                         }
713                                                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
714                                                         }
715                                                         
716                                                         map<string, int>::iterator it = groupCounts.find(thisGroup);
717                                                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
718                                                         else { groupCounts[it->first]++; }
719                                                                 
720                                                 }
721                                         }
722                                         
723                                         if(allFiles){
724                                                 ofstream output;
725                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
726                                                 currSeq.printSequence(output);
727                                                 output.close();
728                                                 
729                                                 if(qFileName != ""){
730                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
731                                                         currQual.printQScores(output);
732                                                         output.close();                                                 
733                                                 }
734                                                 
735                                                 if(nameFile != ""){
736                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
737                                                         if (itName != nameMap.end()) { 
738                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
739                                                                 output << itName->first << '\t' << itName->second << endl; 
740                                                                 output.close();
741                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
742                                                 }
743                                         }
744                                 }
745                                 else{
746                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
747                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
748                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
749                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
750                                         }
751                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
752                                         currSeq.setUnaligned(origSeq);
753                                         currSeq.setAligned(origSeq);
754                                         currSeq.printSequence(scrapFASTAFile);
755                                         if(qFileName != ""){
756                                                 currQual.printQScores(scrapQualFile);
757                                         }
758                                 }
759                                 count++;
760                         }
761                         
762                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
763                                 unsigned long long pos = inFASTA.tellg();
764                                 if ((pos == -1) || (pos >= line.end)) { break; }
765                         
766                         #else
767                                 if (inFASTA.eof()) { break; }
768                         #endif
769                         
770                         //report progress
771                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
772                         
773                 }
774                 //report progress
775                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
776                 
777                 
778                 inFASTA.close();
779                 trimFASTAFile.close();
780                 scrapFASTAFile.close();
781                 if (createGroup) {       outGroupsFile.close();   }
782                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
783                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
784                 
785                 return count;
786         }
787         catch(exception& e) {
788                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
789                 exit(1);
790         }
791 }
792
793 /**************************************************************************************************/
794
795 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
796         try {
797         
798         int process = 1;
799                 int exitCommand = 1;
800                 processIDS.clear();
801                 
802 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
803                                 //loop through and create all the processes you want
804                 while (process != processors) {
805                         int pid = fork();
806                         
807                         if (pid > 0) {
808                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
809                                 process++;
810                         }else if (pid == 0){
811                                 
812                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
813                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
814                                 vector<vector<string> > tempNameFileNames = nameFileNames;
815
816                                 if(allFiles){
817                                         ofstream temp;
818
819                                         for(int i=0;i<tempFASTAFileNames.size();i++){
820                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
821                                                         if (tempFASTAFileNames[i][j] != "") {
822                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
823                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
824
825                                                                 if(qFileName != ""){
826                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
827                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
828                                                                 }
829                                                                 if(nameFile != ""){
830                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
831                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
832                                                                 }
833                                                         }
834                                                 }
835                                         }
836                                 }
837                                                         
838                                 driverCreateTrim(filename,
839                                                                  qFileName,
840                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
841                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
842                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
843                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
844                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
845                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
846                                                                  (groupFile + toString(getpid()) + ".temp"),
847                                                                  tempFASTAFileNames,
848                                                                  tempPrimerQualFileNames,
849                                                                  tempNameFileNames,
850                                                                  lines[process],
851                                                                  qLines[process]);
852                                 
853                                 //pass groupCounts to parent
854                                 if(createGroup){
855                                         ofstream out;
856                                         string tempFile = filename + toString(getpid()) + ".num.temp";
857                                         m->openOutputFile(tempFile, out);
858                                         
859                                         out << groupCounts.size() << endl;
860                                         
861                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
862                                                 out << it->first << '\t' << it->second << endl;
863                                         }
864                                         out.close();
865                                 }
866                                 exit(0);
867                         }else { 
868                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
869                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
870                                 exit(0);
871                         }
872                 }
873                 
874                 //parent do my part
875                 ofstream temp;
876                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
877                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
878                 if(qFileName != ""){
879                         m->openOutputFile(trimQualFileName, temp);              temp.close();
880                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
881                 }
882                 if (nameFile != "") {
883                         m->openOutputFile(trimNameFileName, temp);              temp.close();
884                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
885                 }
886
887                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
888                 
889                 //force parent to wait until all the processes are done
890                 for (int i=0;i<processIDS.size();i++) { 
891                         int temp = processIDS[i];
892                         wait(&temp);
893                 }
894 #else
895         //////////////////////////////////////////////////////////////////////////////////////////////////////
896                 //Windows version shared memory, so be careful when passing variables through the trimData struct. 
897                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
898                 //////////////////////////////////////////////////////////////////////////////////////////////////////
899                 
900                 vector<trimData*> pDataArray; 
901                 DWORD   dwThreadIdArray[processors-1];
902                 HANDLE  hThreadArray[processors-1]; 
903                 
904                 //Create processor worker threads.
905                 for( int i=0; i<processors-1; i++){
906                         
907             string extension = "";
908                         if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
909             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
910             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
911             vector<vector<string> > tempNameFileNames = nameFileNames;
912             
913             if(allFiles){
914                 ofstream temp;
915                 
916                 for(int i=0;i<tempFASTAFileNames.size();i++){
917                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
918                         if (tempFASTAFileNames[i][j] != "") {
919                             tempFASTAFileNames[i][j] += extension;
920                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
921                             
922                             if(qFileName != ""){
923                                 tempPrimerQualFileNames[i][j] += extension;
924                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
925                             }
926                             if(nameFile != ""){
927                                 tempNameFileNames[i][j] += extension;
928                                 m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
929                             }
930                         }
931                     }
932                 }
933             }
934
935             
936                         trimData* tempTrim = new trimData(filename,
937                                               qFileName, nameFile,
938                                               (trimFASTAFileName+extension),
939                                               (scrapFASTAFileName+extension),
940                                               (trimQualFileName+extension),
941                                               (scrapQualFileName+extension),
942                                               (trimNameFileName+extension),
943                                               (scrapNameFileName+extension),
944                                               (groupFile+extension),
945                                               tempFASTAFileNames,
946                                               tempPrimerQualFileNames,
947                                               tempNameFileNames,
948                                               lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
949                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, 
950                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
951                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
952                                              minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
953                         pDataArray.push_back(tempTrim);
954             
955                         hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
956                 }
957         
958         //parent do my part
959                 ofstream temp;
960                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
961                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
962                 if(qFileName != ""){
963                         m->openOutputFile(trimQualFileName, temp);              temp.close();
964                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
965                 }
966                 if (nameFile != "") {
967                         m->openOutputFile(trimNameFileName, temp);              temp.close();
968                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
969                 }
970         
971                 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
972         processIDS.push_back(processors-1);
973
974         
975                 //Wait until all threads have terminated.
976                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
977                 
978                 //Close all thread handles and free memory allocations.
979                 for(int i=0; i < pDataArray.size(); i++){
980                         for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
981                 map<string, int>::iterator it2 = groupCounts.find(it->first);
982                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
983                 else { groupCounts[it->first] += it->second; }
984             }
985             CloseHandle(hThreadArray[i]);
986                         delete pDataArray[i];
987                 }
988         
989 #endif          
990         
991         
992         //append files
993                 for(int i=0;i<processIDS.size();i++){
994                         
995                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
996                         
997                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
998                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
999                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1000                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1001                         
1002                         if(qFileName != ""){
1003                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1004                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1005                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1006                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1007                         }
1008                         
1009                         if(nameFile != ""){
1010                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1011                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1012                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1013                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1014                         }
1015                         
1016                         if(createGroup){
1017                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1018                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1019                         }
1020                         
1021                         
1022                         if(allFiles){
1023                                 for(int j=0;j<fastaFileNames.size();j++){
1024                                         for(int k=0;k<fastaFileNames[j].size();k++){
1025                                                 if (fastaFileNames[j][k] != "") {
1026                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1027                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1028                                                         
1029                                                         if(qFileName != ""){
1030                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1031                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1032                                                         }
1033                                                         
1034                                                         if(nameFile != ""){
1035                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1036                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1037                                                         }
1038                                                 }
1039                                         }
1040                                 }
1041                         }
1042                         
1043             #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1044                         if(createGroup){
1045                                 ifstream in;
1046                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1047                                 m->openInputFile(tempFile, in);
1048                                 int tempNum;
1049                                 string group;
1050                                 
1051                                 in >> tempNum; m->gobble(in);
1052                                 
1053                                 if (tempNum != 0) {
1054                                         while (!in.eof()) { 
1055                                                 in >> group >> tempNum; m->gobble(in);
1056                         
1057                                                 map<string, int>::iterator it = groupCounts.find(group);
1058                                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
1059                                                 else { groupCounts[it->first] += tempNum; }
1060                                         }
1061                                 }
1062                                 in.close(); m->mothurRemove(tempFile);
1063                         }
1064             #endif
1065                 }
1066
1067         return exitCommand;
1068         }
1069         catch(exception& e) {
1070                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1071                 exit(1);
1072         }
1073 }
1074
1075 /**************************************************************************************************/
1076
1077 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1078         try {
1079         
1080         vector<unsigned long long> fastaFilePos;
1081                 vector<unsigned long long> qfileFilePos;
1082                 
1083                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1084                 //set file positions for fasta file
1085                 fastaFilePos = m->divideFile(filename, processors);
1086                 
1087                 //get name of first sequence in each chunk
1088                 map<string, int> firstSeqNames;
1089                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1090                         ifstream in;
1091                         m->openInputFile(filename, in);
1092                         in.seekg(fastaFilePos[i]);
1093                 
1094                         Sequence temp(in); 
1095                         firstSeqNames[temp.getName()] = i;
1096                 
1097                         in.close();
1098                 }
1099                 
1100                 if(qfilename != "")     {
1101             //seach for filePos of each first name in the qfile and save in qfileFilePos
1102             ifstream inQual;
1103             m->openInputFile(qfilename, inQual);
1104             
1105             string input;
1106             while(!inQual.eof()){       
1107                 input = m->getline(inQual);
1108                 
1109                 if (input.length() != 0) {
1110                     if(input[0] == '>'){ //this is a sequence name line
1111                         istringstream nameStream(input);
1112                         
1113                         string sname = "";  nameStream >> sname;
1114                         sname = sname.substr(1);
1115                         
1116                         map<string, int>::iterator it = firstSeqNames.find(sname);
1117                         
1118                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
1119                             unsigned long long pos = inQual.tellg(); 
1120                             qfileFilePos.push_back(pos - input.length() - 1);   
1121                             firstSeqNames.erase(it);
1122                         }
1123                     }
1124                 }
1125                 
1126                 if (firstSeqNames.size() == 0) { break; }
1127             }
1128             inQual.close();
1129             
1130             
1131             if (firstSeqNames.size() != 0) { 
1132                 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1133                     m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1134                 }
1135                 qFileName = "";
1136                 return processors;
1137             }
1138             
1139             //get last file position of qfile
1140             FILE * pFile;
1141             unsigned long long size;
1142             
1143             //get num bytes in file
1144             pFile = fopen (qfilename.c_str(),"rb");
1145             if (pFile==NULL) perror ("Error opening file");
1146             else{
1147                 fseek (pFile, 0, SEEK_END);
1148                 size=ftell (pFile);
1149                 fclose (pFile);
1150             }
1151             
1152             qfileFilePos.push_back(size);
1153         }
1154         
1155         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1156                         lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1157                         if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
1158                 }       
1159                 if(qfilename == "")     {       qLines = lines; } //files with duds
1160                 
1161                 return processors;
1162                 
1163                 #else
1164             
1165         if (processors == 1) { //save time
1166                         //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1167                         //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1168             lines.push_back(linePair(0, 1000));
1169             if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
1170         }else{
1171             int numFastaSeqs = 0;
1172             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
1173             if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1174         
1175             if (qfilename != "") { 
1176                 int numQualSeqs = 0;
1177                 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
1178                 
1179                 if (numFastaSeqs != numQualSeqs) {
1180                     m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
1181                 }
1182             }
1183         
1184             //figure out how many sequences you have to process
1185             int numSeqsPerProcessor = numFastaSeqs / processors;
1186             for (int i = 0; i < processors; i++) {
1187                 int startIndex =  i * numSeqsPerProcessor;
1188                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
1189                 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1190                 cout << fastaFilePos[startIndex] << '\t' << numSeqsPerProcessor << endl;
1191                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1192             }
1193         
1194             if(qfilename == "") {       qLines = lines; } //files with duds
1195         }
1196                         return 1;
1197                 
1198                 #endif
1199         }
1200         catch(exception& e) {
1201                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1202                 exit(1);
1203         }
1204 }
1205
1206 //***************************************************************************************************************
1207
1208 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1209         try {
1210                 ifstream inOligos;
1211                 m->openInputFile(oligoFile, inOligos);
1212                 
1213                 ofstream test;
1214                 
1215                 string type, oligo, group;
1216
1217                 int indexPrimer = 0;
1218                 int indexBarcode = 0;
1219                 
1220                 while(!inOligos.eof()){
1221
1222                         inOligos >> type; 
1223                                         
1224                         if(type[0] == '#'){
1225                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1226                                 m->gobble(inOligos);
1227                         }
1228                         else{
1229                                 m->gobble(inOligos);
1230                                 //make type case insensitive
1231                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1232                                 
1233                                 inOligos >> oligo;
1234                                 
1235                                 for(int i=0;i<oligo.length();i++){
1236                                         oligo[i] = toupper(oligo[i]);
1237                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1238                                 }
1239                                 
1240                                 if(type == "FORWARD"){
1241                                         group = "";
1242                                         
1243                                         // get rest of line in case there is a primer name
1244                                         while (!inOligos.eof()) {       
1245                                                 char c = inOligos.get(); 
1246                                                 if (c == 10 || c == 13){        break;  }
1247                                                 else if (c == 32 || c == 9){;} //space or tab
1248                                                 else {  group += c;  }
1249                                         } 
1250                                         
1251                                         //check for repeat barcodes
1252                                         map<string, int>::iterator itPrime = primers.find(oligo);
1253                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1254                                         
1255                                         primers[oligo]=indexPrimer; indexPrimer++;              
1256                                         primerNameVector.push_back(group);
1257                                 }
1258                                 else if(type == "REVERSE"){
1259                                         //Sequence oligoRC("reverse", oligo);
1260                                         //oligoRC.reverseComplement();
1261                     string oligoRC = reverseOligo(oligo);
1262                                         revPrimer.push_back(oligoRC);
1263                                 }
1264                                 else if(type == "BARCODE"){
1265                                         inOligos >> group;
1266                                         
1267                                         //check for repeat barcodes
1268                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1269                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1270                                                 
1271                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1272                                         barcodeNameVector.push_back(group);
1273                                 }else if(type == "LINKER"){
1274                                         linker.push_back(oligo);
1275                                 }else if(type == "SPACER"){
1276                                         spacer.push_back(oligo);
1277                                 }
1278                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
1279                         }
1280                         m->gobble(inOligos);
1281                 }       
1282                 inOligos.close();
1283                 
1284                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1285                 
1286                 //add in potential combos
1287                 if(barcodeNameVector.size() == 0){
1288                         barcodes[""] = 0;
1289                         barcodeNameVector.push_back("");                        
1290                 }
1291                 
1292                 if(primerNameVector.size() == 0){
1293                         primers[""] = 0;
1294                         primerNameVector.push_back("");                 
1295                 }
1296                 
1297                 fastaFileNames.resize(barcodeNameVector.size());
1298                 for(int i=0;i<fastaFileNames.size();i++){
1299                         fastaFileNames[i].assign(primerNameVector.size(), "");
1300                 }
1301                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1302                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1303                 
1304                 if(allFiles){
1305                         set<string> uniqueNames; //used to cleanup outputFileNames
1306                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1307                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1308                                         
1309                                         string primerName = primerNameVector[itPrimer->second];
1310                                         string barcodeName = barcodeNameVector[itBar->second];
1311                                         
1312                                         string comboGroupName = "";
1313                                         string fastaFileName = "";
1314                                         string qualFileName = "";
1315                                         string nameFileName = "";
1316                                         
1317                                         if(primerName == ""){
1318                                                 comboGroupName = barcodeNameVector[itBar->second];
1319                                         }
1320                                         else{
1321                                                 if(barcodeName == ""){
1322                                                         comboGroupName = primerNameVector[itPrimer->second];
1323                                                 }
1324                                                 else{
1325                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1326                                                 }
1327                                         }
1328                                         
1329                                         
1330                                         ofstream temp;
1331                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1332                                         if (uniqueNames.count(fastaFileName) == 0) {
1333                                                 outputNames.push_back(fastaFileName);
1334                                                 outputTypes["fasta"].push_back(fastaFileName);
1335                                                 uniqueNames.insert(fastaFileName);
1336                                         }
1337                                         
1338                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1339                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1340                                         
1341                                         if(qFileName != ""){
1342                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1343                                                 if (uniqueNames.count(qualFileName) == 0) {
1344                                                         outputNames.push_back(qualFileName);
1345                                                         outputTypes["qfile"].push_back(qualFileName);
1346                                                 }
1347                                                 
1348                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1349                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1350                                         }
1351                                         
1352                                         if(nameFile != ""){
1353                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1354                                                 if (uniqueNames.count(nameFileName) == 0) {
1355                                                         outputNames.push_back(nameFileName);
1356                                                         outputTypes["name"].push_back(nameFileName);
1357                                                 }
1358                                                 
1359                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1360                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1361                                         }
1362                                         
1363                                 }
1364                         }
1365                 }
1366                 numFPrimers = primers.size();
1367                 numRPrimers = revPrimer.size();
1368         numLinkers = linker.size();
1369         numSpacers = spacer.size();
1370                 
1371                 bool allBlank = true;
1372                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1373                         if (barcodeNameVector[i] != "") {
1374                                 allBlank = false;
1375                                 break;
1376                         }
1377                 }
1378                 for (int i = 0; i < primerNameVector.size(); i++) {
1379                         if (primerNameVector[i] != "") {
1380                                 allBlank = false;
1381                                 break;
1382                         }
1383                 }
1384                 
1385                 if (allBlank) {
1386                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1387                         allFiles = false;
1388                         return false;
1389                 }
1390                 
1391                 return true;
1392                 
1393         }
1394         catch(exception& e) {
1395                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1396                 exit(1);
1397         }
1398 }
1399 //***************************************************************************************************************
1400
1401 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1402         try {
1403                 bool success = 1;
1404                 if(qscores.getName() != ""){
1405                         qscores.trimQScores(-1, keepFirst);
1406                 }
1407                 sequence.trim(keepFirst);
1408                 return success;
1409         }
1410         catch(exception& e) {
1411                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1412                 exit(1);
1413         }
1414         
1415 }       
1416
1417 //***************************************************************************************************************
1418
1419 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1420         try {
1421                 bool success = 0;
1422                 
1423                 int length = sequence.getNumBases() - removeLast;
1424                 
1425                 if(length > 0){
1426                         if(qscores.getName() != ""){
1427                                 qscores.trimQScores(-1, length);
1428                         }
1429                         sequence.trim(length);
1430                         success = 1;
1431                 }
1432                 else{
1433                         success = 0;
1434                 }
1435
1436                 return success;
1437         }
1438         catch(exception& e) {
1439                 m->errorOut(e, "removeLastTrim", "countDiffs");
1440                 exit(1);
1441         }
1442         
1443 }       
1444
1445 //***************************************************************************************************************
1446
1447 bool TrimSeqsCommand::cullLength(Sequence& seq){
1448         try {
1449         
1450                 int length = seq.getNumBases();
1451                 bool success = 0;       //guilty until proven innocent
1452                 
1453                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1454                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1455                 else                                                                                            {       success = 0;    }
1456                 
1457                 return success;
1458         
1459         }
1460         catch(exception& e) {
1461                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1462                 exit(1);
1463         }
1464         
1465 }
1466
1467 //***************************************************************************************************************
1468
1469 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1470         try {
1471                 int longHomoP = seq.getLongHomoPolymer();
1472                 bool success = 0;       //guilty until proven innocent
1473                 
1474                 if(longHomoP <= maxHomoP){      success = 1;    }
1475                 else                                    {       success = 0;    }
1476                 
1477                 return success;
1478         }
1479         catch(exception& e) {
1480                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1481                 exit(1);
1482         }
1483         
1484 }
1485 //********************************************************************/
1486 string TrimSeqsCommand::reverseOligo(string oligo){
1487         try {
1488         string reverse = "";
1489         
1490         for(int i=oligo.length()-1;i>=0;i--){
1491             
1492             if(oligo[i] == 'A')         {       reverse += 'T'; }
1493             else if(oligo[i] == 'T'){   reverse += 'A'; }
1494             else if(oligo[i] == 'U'){   reverse += 'A'; }
1495             
1496             else if(oligo[i] == 'G'){   reverse += 'C'; }
1497             else if(oligo[i] == 'C'){   reverse += 'G'; }
1498             
1499             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1500             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1501             
1502             else if(oligo[i] == 'M'){   reverse += 'K'; }
1503             else if(oligo[i] == 'K'){   reverse += 'M'; }
1504             
1505             else if(oligo[i] == 'W'){   reverse += 'W'; }
1506             else if(oligo[i] == 'S'){   reverse += 'S'; }
1507             
1508             else if(oligo[i] == 'B'){   reverse += 'V'; }
1509             else if(oligo[i] == 'V'){   reverse += 'B'; }
1510             
1511             else if(oligo[i] == 'D'){   reverse += 'H'; }
1512             else if(oligo[i] == 'H'){   reverse += 'D'; }
1513             
1514             else                                                {       reverse += 'N'; }
1515         }
1516         
1517         
1518         return reverse;
1519     }
1520         catch(exception& e) {
1521                 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1522                 exit(1);
1523         }
1524 }
1525
1526 //***************************************************************************************************************
1527
1528 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1529         try {
1530                 int numNs = seq.getAmbigBases();
1531                 bool success = 0;       //guilty until proven innocent
1532                 
1533                 if(numNs <= maxAmbig)   {       success = 1;    }
1534                 else                                    {       success = 0;    }
1535                 
1536                 return success;
1537         }
1538         catch(exception& e) {
1539                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1540                 exit(1);
1541         }
1542         
1543 }
1544 //***************************************************************************************************************