]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
working on chimera.uchime change for dereplicate=t bug. added shared file to get...
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14
15 //**********************************************************************************************************************
16 vector<string> TrimSeqsCommand::setParameters(){        
17         try {
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
19                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
20                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","qfile",false,false,true); parameters.push_back(pqfile);
21                 CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname);
22         CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount);
23                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pflip);
24         CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
25                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig);
26                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop);
27                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength);
28                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength);
29                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
30                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
31         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
32                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
33         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
34                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
35                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
36                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward);
37                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim);
38                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold);
39                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage);
40                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(prollaverage);
41                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqwindowaverage);
42                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pqstepsize);
43                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "","",false,false); parameters.push_back(pqwindowsize);
44                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pkeepfirst);
45                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "","",false,false); parameters.push_back(premovelast);
46                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
47                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
48                         
49                 vector<string> myArray;
50                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
51                 return myArray;
52         }
53         catch(exception& e) {
54                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
55                 exit(1);
56         }
57 }
58 //**********************************************************************************************************************
59 string TrimSeqsCommand::getHelpString(){        
60         try {
61                 string helpString = "";
62                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
63                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
64                 helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
65                 helpString += "The fasta parameter is required.\n";
66                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
67         helpString += "The checkorient parameter will check the reverse compliment of the sequence if the barcodes and primers cannot be found in the forward. The default is false.\n";
68                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
69                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
70         helpString += "The count parameter allows you to provide a count file with your fasta file.\n";
71                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
72                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
73                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
74                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
75                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
76                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
77                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
78         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
79                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
80                 helpString += "The qfile parameter allows you to provide a quality file.\n";
81                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
82                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
83                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
84                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
85                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
86                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
87                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
88                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
89                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
90                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
91                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
92                 helpString += "The trim.seqs command should be in the following format: \n";
93                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
94                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
95                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
96                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
97                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
98                 return helpString;
99         }
100         catch(exception& e) {
101                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
102                 exit(1);
103         }
104 }
105 //**********************************************************************************************************************
106 string TrimSeqsCommand::getOutputPattern(string type) {
107     try {
108         string pattern = "";
109         
110         if (type == "qfile") {  pattern = "[filename],[tag],qual"; } 
111         else if (type == "fasta") {  pattern = "[filename],[tag],fasta"; } 
112         else if (type == "group") {  pattern = "[filename],groups"; }
113         else if (type == "name") {  pattern = "[filename],[tag],names"; }
114         else if (type == "count") {  pattern = "[filename],[tag],count_table-[filename],count_table"; }
115         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
116         
117         return pattern;
118     }
119     catch(exception& e) {
120         m->errorOut(e, "TrimSeqsCommand", "getOutputPattern");
121         exit(1);
122     }
123 }
124 //**********************************************************************************************************************
125
126 TrimSeqsCommand::TrimSeqsCommand(){     
127         try {
128                 abort = true; calledHelp = true;
129                 setParameters();
130                 vector<string> tempOutNames;
131                 outputTypes["fasta"] = tempOutNames;
132                 outputTypes["qfile"] = tempOutNames;
133                 outputTypes["group"] = tempOutNames;
134                 outputTypes["name"] = tempOutNames;
135         outputTypes["count"] = tempOutNames;
136         }
137         catch(exception& e) {
138                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
139                 exit(1);
140         }
141 }
142 //***************************************************************************************************************
143
144 TrimSeqsCommand::TrimSeqsCommand(string option)  {
145         try {
146                 
147                 abort = false; calledHelp = false;   
148                 comboStarts = 0;
149                 
150                 //allow user to run help
151                 if(option == "help") { help(); abort = true; calledHelp = true; }
152                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
153                 
154                 else {
155                         vector<string> myArray = setParameters();
156                         
157                         OptionParser parser(option);
158                         map<string,string> parameters = parser.getParameters();
159                         
160                         ValidParameters validParameter;
161                         map<string,string>::iterator it;
162                         
163                         //check to make sure all parameters are valid for command
164                         for (it = parameters.begin(); it != parameters.end(); it++) { 
165                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
166                         }
167                         
168                         //initialize outputTypes
169                         vector<string> tempOutNames;
170                         outputTypes["fasta"] = tempOutNames;
171                         outputTypes["qfile"] = tempOutNames;
172                         outputTypes["group"] = tempOutNames;
173                         outputTypes["name"] = tempOutNames;
174             outputTypes["count"] = tempOutNames;
175                         
176                         //if the user changes the input directory command factory will send this info to us in the output parameter 
177                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
178                         if (inputDir == "not found"){   inputDir = "";          }
179                         else {
180                                 string path;
181                                 it = parameters.find("fasta");
182                                 //user has given a template file
183                                 if(it != parameters.end()){ 
184                                         path = m->hasPath(it->second);
185                                         //if the user has not given a path then, add inputdir. else leave path alone.
186                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
187                                 }
188                                 
189                                 it = parameters.find("oligos");
190                                 //user has given a template file
191                                 if(it != parameters.end()){ 
192                                         path = m->hasPath(it->second);
193                                         //if the user has not given a path then, add inputdir. else leave path alone.
194                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
195                                 }
196                                 
197                                 it = parameters.find("qfile");
198                                 //user has given a template file
199                                 if(it != parameters.end()){ 
200                                         path = m->hasPath(it->second);
201                                         //if the user has not given a path then, add inputdir. else leave path alone.
202                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
203                                 }
204                                 
205                                 it = parameters.find("name");
206                                 //user has given a template file
207                                 if(it != parameters.end()){ 
208                                         path = m->hasPath(it->second);
209                                         //if the user has not given a path then, add inputdir. else leave path alone.
210                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
211                                 }
212                 
213                 it = parameters.find("count");
214                                 //user has given a template file
215                                 if(it != parameters.end()){ 
216                                         path = m->hasPath(it->second);
217                                         //if the user has not given a path then, add inputdir. else leave path alone.
218                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
219                                 }
220                                 
221                         }
222
223                         
224                         //check for required parameters
225                         fastaFile = validParameter.validFile(parameters, "fasta", true);
226                         if (fastaFile == "not found") {                                 
227                                 fastaFile = m->getFastaFile(); 
228                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
229                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
230                         }else if (fastaFile == "not open") { abort = true; }    
231                         else { m->setFastaFile(fastaFile); }
232                         
233                         //if the user changes the output directory command factory will send this info to us in the output parameter 
234                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
235                                 outputDir = ""; 
236                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
237                         }
238                 
239                         
240                         //check for optional parameter and set defaults
241                         // ...at some point should added some additional type checking...
242                         string temp;
243                         temp = validParameter.validFile(parameters, "flip", false);
244                         if (temp == "not found")    {   flip = 0;       }
245                         else {  flip = m->isTrue(temp);         }
246                 
247                         temp = validParameter.validFile(parameters, "oligos", true);
248                         if (temp == "not found"){       oligoFile = "";         }
249                         else if(temp == "not open"){    abort = true;   } 
250                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
251                         
252                         
253                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
254                         m->mothurConvert(temp, maxAmbig);  
255
256                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
257                         m->mothurConvert(temp, maxHomoP);  
258
259                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
260                         m->mothurConvert(temp, minLength); 
261                         
262                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
263                         m->mothurConvert(temp, maxLength);
264                         
265                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
266                         m->mothurConvert(temp, bdiffs);
267                         
268                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
269                         m->mothurConvert(temp, pdiffs);
270             
271             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
272                         m->mothurConvert(temp, ldiffs);
273             
274             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
275                         m->mothurConvert(temp, sdiffs);
276                         
277                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
278                         m->mothurConvert(temp, tdiffs);
279                         
280                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
281                         
282                         temp = validParameter.validFile(parameters, "qfile", true);     
283                         if (temp == "not found")        {       qFileName = "";         }
284                         else if(temp == "not open")     {       abort = true;           }
285                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
286                         
287                         temp = validParameter.validFile(parameters, "name", true);      
288                         if (temp == "not found")        {       nameFile = "";          }
289                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
290                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
291             
292             countfile = validParameter.validFile(parameters, "count", true);
293                         if (countfile == "not open") { abort = true; countfile = ""; }  
294                         else if (countfile == "not found") { countfile = ""; }
295                         else { m->setCountTableFile(countfile); }
296                         
297             if ((countfile != "") && (nameFile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
298                         
299                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
300                         m->mothurConvert(temp, qThreshold);
301                         
302                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
303                         qtrim = m->isTrue(temp);
304
305                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
306                         convert(temp, qRollAverage);
307
308                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
309                         convert(temp, qWindowAverage);
310
311                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
312                         convert(temp, qWindowSize);
313
314                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
315                         convert(temp, qWindowStep);
316
317                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
318                         convert(temp, qAverage);
319
320                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
321                         convert(temp, keepFirst);
322
323                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
324                         convert(temp, removeLast);
325                         
326                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
327                         allFiles = m->isTrue(temp);
328             
329             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
330                         keepforward = m->isTrue(temp);
331             
332             temp = validParameter.validFile(parameters, "checkorient", false);          if (temp == "not found") { temp = "F"; }
333                         reorient = m->isTrue(temp);
334                         
335                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
336                         m->setProcessors(temp);
337                         m->mothurConvert(temp, processors); 
338                         
339                         
340                         if(allFiles && (oligoFile == "")){
341                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
342                         }
343                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
344                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
345                                 qAverage=0;
346                                 qThreshold=0;
347                         }
348                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
349                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
350                                 abort = true;
351                         }
352                         
353             if (countfile == "") {
354                 if (nameFile == "") {
355                     vector<string> files; files.push_back(fastaFile);
356                     parser.getNameFile(files);
357                 }
358             }
359                 }
360
361         }
362         catch(exception& e) {
363                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
364                 exit(1);
365         }
366 }
367 //***************************************************************************************************************
368
369 int TrimSeqsCommand::execute(){
370         try{
371         
372                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
373                 
374         pairedOligos = false;
375                 numFPrimers = 0;  //this needs to be initialized
376                 numRPrimers = 0;
377         numSpacers = 0;
378         numLinkers = 0;
379                 createGroup = false;
380                 vector<vector<string> > fastaFileNames;
381                 vector<vector<string> > qualFileNames;
382                 vector<vector<string> > nameFileNames;
383                 
384         map<string, string> variables; 
385                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
386         variables["[tag]"] = "trim";
387                 string trimSeqFile = getOutputFileName("fasta",variables);
388         string trimQualFile = getOutputFileName("qfile",variables);
389                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
390         
391         variables["[tag]"] = "scrap";
392                 string scrapSeqFile = getOutputFileName("fasta",variables);
393         string scrapQualFile = getOutputFileName("qfile",variables);
394                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
395                 
396                 if (qFileName != "") {
397                         outputNames.push_back(trimQualFile);
398                         outputNames.push_back(scrapQualFile);
399                         outputTypes["qfile"].push_back(trimQualFile);
400                         outputTypes["qfile"].push_back(scrapQualFile); 
401                 }
402                 
403         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
404         variables["[tag]"] = "trim";
405                 string trimNameFile = getOutputFileName("name",variables);
406         variables["[tag]"] = "scrap";
407                 string scrapNameFile = getOutputFileName("name",variables);
408                 
409                 if (nameFile != "") {
410                         m->readNames(nameFile, nameMap);
411                         outputNames.push_back(trimNameFile);
412                         outputNames.push_back(scrapNameFile);
413                         outputTypes["name"].push_back(trimNameFile);
414                         outputTypes["name"].push_back(scrapNameFile); 
415                 }
416         
417         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
418         variables["[tag]"] = "trim";
419         string trimCountFile = getOutputFileName("count",variables);
420         variables["[tag]"] = "scrap";
421                 string scrapCountFile = getOutputFileName("count",variables);
422                 
423                 if (countfile != "") {
424             CountTable ct;
425             ct.readTable(countfile);
426             nameCount = ct.getNameMap();
427                         outputNames.push_back(trimCountFile);
428                         outputNames.push_back(scrapCountFile);
429                         outputTypes["count"].push_back(trimCountFile);
430                         outputTypes["count"].push_back(scrapCountFile); 
431                 }
432
433                 
434                 if (m->control_pressed) { return 0; }
435                 
436                 string outputGroupFileName;
437                 if(oligoFile != ""){
438                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
439                         if ((createGroup) && (countfile == "")){
440                 map<string, string> myvariables; 
441                 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
442                                 outputGroupFileName = getOutputFileName("group",myvariables);
443                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
444                         }
445                 }
446        
447         if (m->control_pressed) { return 0; }
448             
449         //fills lines and qlines
450                 setLines(fastaFile, qFileName);
451                 
452         if(processors == 1){
453             driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
454         }else{
455             createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
456         }       
457                 
458                 
459                 if (m->control_pressed) {  return 0; }                  
460         
461                 if(allFiles){
462                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
463                         map<string, string>::iterator it;
464                         set<string> namesToRemove;
465                         for(int i=0;i<fastaFileNames.size();i++){
466                                 for(int j=0;j<fastaFileNames[0].size();j++){
467                                         if (fastaFileNames[i][j] != "") {
468                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
469                                                         if(m->isBlank(fastaFileNames[i][j])){
470                                                                 m->mothurRemove(fastaFileNames[i][j]);
471                                                                 namesToRemove.insert(fastaFileNames[i][j]);
472                                                         
473                                                                 if(qFileName != ""){
474                                                                         m->mothurRemove(qualFileNames[i][j]);
475                                                                         namesToRemove.insert(qualFileNames[i][j]);
476                                                                 }
477                                                                 
478                                                                 if(nameFile != ""){
479                                                                         m->mothurRemove(nameFileNames[i][j]);
480                                                                         namesToRemove.insert(nameFileNames[i][j]);
481                                                                 }
482                                                         }else{  
483                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
484                                                                 if (it == uniqueFastaNames.end()) {     
485                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
486                                                                 }       
487                                                         }
488                                                 }
489                                         }
490                                 }
491                         }
492                         
493                         //remove names for outputFileNames, just cleans up the output
494                         vector<string> outputNames2;
495                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
496                         outputNames = outputNames2;
497                         
498             for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
499                 ifstream in;
500                 m->openInputFile(it->first, in);
501                 
502                 ofstream out;
503                 map<string, string> myvariables; 
504                 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(it->first));
505                 string thisGroupName = "";
506                 if (countfile == "") { thisGroupName = getOutputFileName("group",myvariables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); }
507                 else {  thisGroupName = getOutputFileName("count",myvariables); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName);  }
508                 m->openOutputFile(thisGroupName, out);
509                 
510                 if (countfile != "") {  out << "Representative_Sequence\ttotal\t" << it->second << endl;  }
511                 
512                 while (!in.eof()){
513                     if (m->control_pressed) { break; }
514                     
515                     Sequence currSeq(in); m->gobble(in);
516                     if (countfile == "") {  
517                         out << currSeq.getName() << '\t' << it->second << endl;  
518                         
519                         if (nameFile != "") {
520                             map<string, string>::iterator itName = nameMap.find(currSeq.getName());
521                             if (itName != nameMap.end()) { 
522                                 vector<string> thisSeqsNames; 
523                                 m->splitAtChar(itName->second, thisSeqsNames, ',');
524                                 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
525                                     out << thisSeqsNames[k] << '\t' << it->second << endl;
526                                 }
527                             }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                       
528                         }
529                     }else { 
530                         map<string, int>::iterator itTotalReps = nameCount.find(currSeq.getName());
531                         if (itTotalReps != nameCount.end()) { out << currSeq.getName() << '\t' << itTotalReps->second << '\t' << itTotalReps->second << endl; }
532                         else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
533                     }
534                 }
535                 in.close();
536                 out.close();
537             }
538             
539             if (countfile != "") { //create countfile with group info included
540                 CountTable* ct = new CountTable();
541                 ct->readTable(trimCountFile);
542                 map<string, int> justTrimmedNames = ct->getNameMap();
543                 delete ct;
544                 
545                 CountTable newCt;
546                 for (map<string, int>::iterator itCount = groupCounts.begin(); itCount != groupCounts.end(); itCount++) { newCt.addGroup(itCount->first); }
547                 vector<int> tempCounts; tempCounts.resize(groupCounts.size(), 0);
548                 for (map<string, int>::iterator itNames = justTrimmedNames.begin(); itNames != justTrimmedNames.end(); itNames++) {
549                     newCt.push_back(itNames->first, tempCounts); //add it to the table with no abundance so we can set the groups abundance
550                     map<string, string>::iterator it2 = groupMap.find(itNames->first);
551                     if (it2 != groupMap.end()) { newCt.setAbund(itNames->first, it2->second, itNames->second); }
552                     else { m->mothurOut("[ERROR]: missing group info for " + itNames->first + "."); m->mothurOutEndLine(); m->control_pressed = true; }
553                 }
554                 newCt.printTable(trimCountFile);
555             }
556                 }
557                 
558                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
559
560                 //output group counts
561                 m->mothurOutEndLine();
562                 int total = 0;
563                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
564                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
565                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
566                 }
567                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
568                 
569                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
570
571                 //set fasta file as new current fastafile
572                 string current = "";
573                 itTypes = outputTypes.find("fasta");
574                 if (itTypes != outputTypes.end()) {
575                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
576                 }
577                 
578                 itTypes = outputTypes.find("name");
579                 if (itTypes != outputTypes.end()) {
580                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
581                 }
582                 
583                 itTypes = outputTypes.find("qfile");
584                 if (itTypes != outputTypes.end()) {
585                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
586                 }
587                 
588                 itTypes = outputTypes.find("group");
589                 if (itTypes != outputTypes.end()) {
590                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
591                 }
592         
593         itTypes = outputTypes.find("count");
594                 if (itTypes != outputTypes.end()) {
595                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
596                 }
597
598                 m->mothurOutEndLine();
599                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
600                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
601                 m->mothurOutEndLine();
602                 
603                 return 0;       
604                         
605         }
606         catch(exception& e) {
607                 m->errorOut(e, "TrimSeqsCommand", "execute");
608                 exit(1);
609         }
610 }
611                 
612 /**************************************************************************************/
613 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string trimCFileName, string scrapCFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) { 
614                 
615         try {
616                 
617                 ofstream trimFASTAFile;
618                 m->openOutputFile(trimFileName, trimFASTAFile);
619                 
620                 ofstream scrapFASTAFile;
621                 m->openOutputFile(scrapFileName, scrapFASTAFile);
622                 
623                 ofstream trimQualFile;
624                 ofstream scrapQualFile;
625                 if(qFileName != ""){
626                         m->openOutputFile(trimQFileName, trimQualFile);
627                         m->openOutputFile(scrapQFileName, scrapQualFile);
628                 }
629                 
630                 ofstream trimNameFile;
631                 ofstream scrapNameFile;
632                 if(nameFile != ""){
633                         m->openOutputFile(trimNFileName, trimNameFile);
634                         m->openOutputFile(scrapNFileName, scrapNameFile);
635                 }
636                 
637         ofstream trimCountFile;
638                 ofstream scrapCountFile;
639                 if(countfile != ""){
640                         m->openOutputFile(trimCFileName, trimCountFile);
641                         m->openOutputFile(scrapCFileName, scrapCountFile);
642             if (line.start == 0) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
643                 }
644                 
645                 ofstream outGroupsFile;
646                 if ((createGroup) && (countfile == "")){        m->openOutputFile(groupFileName, outGroupsFile);   }
647                 if(allFiles){
648                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
649                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
650                                         if (fastaFileNames[i][j] != "") {
651                                                 ofstream temp;
652                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
653                                                 if(qFileName != ""){
654                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
655                                                 }
656                                                 
657                                                 if(nameFile != ""){
658                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
659                                                 }
660                                         }
661                                 }
662                         }
663                 }
664                 
665                 ifstream inFASTA;
666                 m->openInputFile(filename, inFASTA);
667                 inFASTA.seekg(line.start);
668                 
669                 ifstream qFile;
670                 if(qFileName != "")     {
671                         m->openInputFile(qFileName, qFile);
672                         qFile.seekg(qline.start);  
673                 }
674                 
675                 int count = 0;
676                 bool moreSeqs = 1;
677         int numBarcodes = barcodes.size();
678                 TrimOligos* trimOligos = NULL;
679         if (pairedOligos)   {   trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes); numBarcodes = pairedBarcodes.size(); }
680         else                {   trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);  }
681         
682         TrimOligos* rtrimOligos = NULL;
683         if (reorient) {
684             //create reoriented primer and barcode pairs
685             map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
686             for (map<int, oligosPair>::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) {
687                   oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
688                 rpairedPrimers[it->first] = tempPair;
689             }
690             for (map<int, oligosPair>::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) {
691                  oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
692                 rpairedBarcodes[it->first] = tempPair;
693             }
694             rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
695         }
696         
697                 while (moreSeqs) {
698                                 
699                         if (m->control_pressed) {
700                 delete trimOligos; if (reorient) { delete rtrimOligos; }
701                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
702                                 if ((createGroup) && (countfile == "")) {        outGroupsFile.close();   }
703                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
704                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
705                 if(countfile != "")     {       scrapCountFile.close(); trimCountFile.close();  }
706                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;
707                         }
708                         
709                         int success = 1;
710                         string trashCode = "";
711                         int currentSeqsDiffs = 0;
712
713                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
714                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
715             
716                         QualityScores currQual;
717                         if(qFileName != ""){
718                                 currQual = QualityScores(qFile);  m->gobble(qFile);
719                 //cout << currQual.getName() << endl;
720                         }
721                         
722                         string origSeq = currSeq.getUnaligned();
723                         if (origSeq != "") {
724                                 
725                                 int barcodeIndex = 0;
726                                 int primerIndex = 0;
727                                 
728                 if(numLinkers != 0){
729                                         success = trimOligos->stripLinker(currSeq, currQual);
730                                         if(success > ldiffs)            {       trashCode += 'k';       }
731                                         else{ currentSeqsDiffs += success;  }
732
733                                 }
734                 
735                                 if(numBarcodes != 0){
736                                         success = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex);
737                                         if(success > bdiffs)            {       trashCode += 'b';       }
738                                         else{ currentSeqsDiffs += success;  }
739                                 }
740                                 
741                 if(numSpacers != 0){
742                                         success = trimOligos->stripSpacer(currSeq, currQual);
743                                         if(success > sdiffs)            {       trashCode += 's';       }
744                                         else{ currentSeqsDiffs += success;  }
745
746                                 }
747                 
748                                 if(numFPrimers != 0){
749                                         success = trimOligos->stripForward(currSeq, currQual, primerIndex, keepforward);
750                                         if(success > pdiffs)            {       trashCode += 'f';       }
751                                         else{ currentSeqsDiffs += success;  }
752                                 }
753                                 
754                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
755                                 
756                                 if(numRPrimers != 0){
757                                         success = trimOligos->stripReverse(currSeq, currQual);
758                                         if(!success)                            {       trashCode += 'r';       }
759                                 }
760                 
761                 if (reorient && (trashCode != "")) { //if you failed and want to check the reverse
762                     int thisSuccess = 0;
763                     string thisTrashCode = "";
764                     int thisCurrentSeqsDiffs = 0;
765                     
766                     int thisBarcodeIndex = 0;
767                     int thisPrimerIndex = 0;
768                     
769                     if(numBarcodes != 0){
770                         thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex);
771                         if(thisSuccess > bdiffs)                {       thisTrashCode += 'b';   }
772                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
773                     }
774                     
775                     if(numFPrimers != 0){
776                         thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, keepforward);
777                         if(thisSuccess > pdiffs)                {       thisTrashCode += 'f';   }
778                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
779                     }
780                     
781                     if (thisCurrentSeqsDiffs > tdiffs)  {       thisTrashCode += 't';   }
782                     
783                     if (thisTrashCode == "") { 
784                         trashCode = thisTrashCode;
785                         success = thisSuccess;
786                         currentSeqsDiffs = thisCurrentSeqsDiffs;
787                         barcodeIndex = thisBarcodeIndex;
788                         primerIndex = thisPrimerIndex;
789                         currSeq.reverseComplement();
790                         if(qFileName != ""){
791                             currQual.flipQScores();
792                         }
793                     }
794                 }
795                 
796                                 if(keepFirst != 0){
797                                         success = keepFirstTrim(currSeq, currQual);
798                                 }
799                                 
800                                 if(removeLast != 0){
801                                         success = removeLastTrim(currSeq, currQual);
802                                         if(!success)                            {       trashCode += 'l';       }
803                                 }
804
805                                 
806                                 if(qFileName != ""){
807                                         int origLength = currSeq.getNumBases();
808                                         
809                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
810                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
811                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
812                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
813                                         else                                            {       success = 1;                            }
814                                         
815                                         //you don't want to trim, if it fails above then scrap it
816                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
817                                         
818                                         if(!success)                            {       trashCode += 'q';       }
819                                 }                               
820                 
821                                 if(minLength > 0 || maxLength > 0){
822                                         success = cullLength(currSeq);
823                                         if(!success)                            {       trashCode += 'l';       }
824                                 }
825                                 if(maxHomoP > 0){
826                                         success = cullHomoP(currSeq);
827                                         if(!success)                            {       trashCode += 'h';       }
828                                 }
829                                 if(maxAmbig != -1){
830                                         success = cullAmbigs(currSeq);
831                                         if(!success)                            {       trashCode += 'n';       }
832                                 }
833                                 
834                                 if(flip){               // should go last                       
835                                         currSeq.reverseComplement();
836                                         if(qFileName != ""){
837                                                 currQual.flipQScores(); 
838                                         }
839                                 }
840                                 
841                 if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + ", trashcode= " + trashCode); if (trashCode.length() != 0) { m->mothurOutEndLine(); } }
842                 
843                                 if(trashCode.length() == 0){
844                     string thisGroup = "";
845                     if (createGroup) {
846                                                 if(numBarcodes != 0){
847                                                         thisGroup = barcodeNameVector[barcodeIndex];
848                                                         if (numFPrimers != 0) {
849                                                                 if (primerNameVector[primerIndex] != "") { 
850                                                                         if(thisGroup != "") {
851                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
852                                                                         }else {
853                                                                                 thisGroup = primerNameVector[primerIndex]; 
854                                                                         }
855                                                                 } 
856                                                         }
857                         }
858                     }
859                     
860                     int pos = thisGroup.find("ignore");
861                     if (pos == string::npos) {
862                         currSeq.setAligned(currSeq.getUnaligned());
863                         currSeq.printSequence(trimFASTAFile);
864                         
865                         if(qFileName != ""){
866                             currQual.printQScores(trimQualFile);
867                         }
868                         
869                         
870                         if(nameFile != ""){
871                             map<string, string>::iterator itName = nameMap.find(currSeq.getName());
872                             if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
873                             else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
874                         }
875                         
876                         int numRedundants = 0;
877                         if (countfile != "") {
878                             map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
879                             if (itCount != nameCount.end()) { 
880                                 trimCountFile << itCount->first << '\t' << itCount->second << endl;
881                                 numRedundants = itCount->second-1;
882                             }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
883                         }
884                         
885                         if (createGroup) {
886                             if(numBarcodes != 0){
887                                                                 
888                                 if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
889                                 
890                                 if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
891                                 else {   groupMap[currSeq.getName()] = thisGroup; }
892                                 
893                                 if (nameFile != "") {
894                                     map<string, string>::iterator itName = nameMap.find(currSeq.getName());
895                                     if (itName != nameMap.end()) { 
896                                         vector<string> thisSeqsNames; 
897                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
898                                         numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
899                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
900                                             outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
901                                         }
902                                     }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                       
903                                 }
904                                 
905                                 map<string, int>::iterator it = groupCounts.find(thisGroup);
906                                 if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1 + numRedundants; }
907                                 else { groupCounts[it->first] += (1 + numRedundants); }
908                                                                 
909                             }
910                         }
911                         
912                         if(allFiles){
913                             ofstream output;
914                             m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
915                             currSeq.printSequence(output);
916                             output.close();
917                             
918                             if(qFileName != ""){
919                                 m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
920                                 currQual.printQScores(output);
921                                 output.close();                                                 
922                             }
923                             
924                             if(nameFile != ""){
925                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
926                                 if (itName != nameMap.end()) { 
927                                     m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
928                                     output << itName->first << '\t' << itName->second << endl; 
929                                     output.close();
930                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
931                             }
932                         }
933                     }
934                                 }
935                                 else{
936                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
937                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
938                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
939                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
940                                         }
941                     if (countfile != "") {
942                         map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
943                         if (itCount != nameCount.end()) { 
944                             trimCountFile << itCount->first << '\t' << itCount->second << endl;
945                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
946                     }
947                     
948                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
949                                         currSeq.setUnaligned(origSeq);
950                                         currSeq.setAligned(origSeq);
951                                         currSeq.printSequence(scrapFASTAFile);
952                                         if(qFileName != ""){
953                                                 currQual.printQScores(scrapQualFile);
954                                         }
955                                 }
956                                 count++;
957                         }
958                         
959                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
960                                 unsigned long long pos = inFASTA.tellg();
961                                 if ((pos == -1) || (pos >= line.end)) { break; }
962                         
963                         #else
964                                 if (inFASTA.eof()) { break; }
965                         #endif
966                         
967                         //report progress
968                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
969                         
970                 }
971                 //report progress
972                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
973                 
974                 delete trimOligos;
975         if (reorient) { delete rtrimOligos; }
976                 inFASTA.close();
977                 trimFASTAFile.close();
978                 scrapFASTAFile.close();
979                 if (createGroup) {       outGroupsFile.close();   }
980                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
981                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
982         if(countfile != "")     {       scrapCountFile.close(); trimCountFile.close();  }
983                 
984                 return count;
985         }
986         catch(exception& e) {
987                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
988                 exit(1);
989         }
990 }
991
992 /**************************************************************************************************/
993
994 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string trimCountFileName, string scrapCountFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
995         try {
996         
997         int process = 1;
998                 int exitCommand = 1;
999                 processIDS.clear();
1000                 
1001 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1002                                 //loop through and create all the processes you want
1003                 while (process != processors) {
1004                         int pid = fork();
1005                         
1006                         if (pid > 0) {
1007                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1008                                 process++;
1009                         }else if (pid == 0){
1010                                 
1011                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1012                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1013                                 vector<vector<string> > tempNameFileNames = nameFileNames;
1014
1015                                 if(allFiles){
1016                                         ofstream temp;
1017
1018                                         for(int i=0;i<tempFASTAFileNames.size();i++){
1019                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1020                                                         if (tempFASTAFileNames[i][j] != "") {
1021                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
1022                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
1023
1024                                                                 if(qFileName != ""){
1025                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
1026                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
1027                                                                 }
1028                                                                 if(nameFile != ""){
1029                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
1030                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
1031                                                                 }
1032                                                         }
1033                                                 }
1034                                         }
1035                                 }
1036                                                         
1037                                 driverCreateTrim(filename,
1038                                                                  qFileName,
1039                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
1040                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
1041                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
1042                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
1043                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
1044                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
1045                                  (trimCountFileName + toString(getpid()) + ".temp"),
1046                                                                  (scrapCountFileName + toString(getpid()) + ".temp"),
1047                                                                  (groupFile + toString(getpid()) + ".temp"),
1048                                                                  tempFASTAFileNames,
1049                                                                  tempPrimerQualFileNames,
1050                                                                  tempNameFileNames,
1051                                                                  lines[process],
1052                                                                  qLines[process]);
1053                 
1054                 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); }
1055                                 
1056                                 //pass groupCounts to parent
1057                                 if(createGroup){
1058                                         ofstream out;
1059                                         string tempFile = filename + toString(getpid()) + ".num.temp";
1060                                         m->openOutputFile(tempFile, out);
1061                                         
1062                                         out << groupCounts.size() << endl;
1063                                         
1064                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
1065                                                 out << it->first << '\t' << it->second << endl;
1066                                         }
1067                     
1068                     out << groupMap.size() << endl;
1069                     for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
1070                                                 out << it->first << '\t' << it->second << endl;
1071                                         }
1072                                         out.close();
1073                                 }
1074                                 exit(0);
1075                         }else { 
1076                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1077                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1078                                 exit(0);
1079                         }
1080                 }
1081                 
1082                 //parent do my part
1083                 ofstream temp;
1084                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
1085                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
1086                 if(qFileName != ""){
1087                         m->openOutputFile(trimQualFileName, temp);              temp.close();
1088                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
1089                 }
1090                 if (nameFile != "") {
1091                         m->openOutputFile(trimNameFileName, temp);              temp.close();
1092                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
1093                 }
1094         if (countfile != "") {
1095                         m->openOutputFile(trimCountFileName, temp);             temp.close();
1096                         m->openOutputFile(scrapCountFileName, temp);            temp.close();
1097                 }
1098
1099                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, trimCountFileName, scrapCountFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
1100                 
1101                 //force parent to wait until all the processes are done
1102                 for (int i=0;i<processIDS.size();i++) { 
1103                         int temp = processIDS[i];
1104                         wait(&temp);
1105                 }
1106 #else
1107         //////////////////////////////////////////////////////////////////////////////////////////////////////
1108                 //Windows version shared memory, so be careful when passing variables through the trimData struct. 
1109                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1110                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1111                 
1112                 vector<trimData*> pDataArray; 
1113                 DWORD   dwThreadIdArray[processors-1];
1114                 HANDLE  hThreadArray[processors-1]; 
1115                 
1116                 //Create processor worker threads.
1117                 for( int h=0; h<processors-1; h++){
1118                         
1119             string extension = "";
1120                         if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
1121             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1122             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1123             vector<vector<string> > tempNameFileNames = nameFileNames;
1124             
1125             if(allFiles){
1126                 ofstream temp;
1127                 
1128                 for(int i=0;i<tempFASTAFileNames.size();i++){
1129                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
1130                         if (tempFASTAFileNames[i][j] != "") {
1131                             tempFASTAFileNames[i][j] += extension;
1132                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
1133                             
1134                             if(qFileName != ""){
1135                                 tempPrimerQualFileNames[i][j] += extension;
1136                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
1137                             }
1138                             if(nameFile != ""){
1139                                 tempNameFileNames[i][j] += extension;
1140                                 m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
1141                             }
1142                         }
1143                     }
1144                 }
1145             }
1146
1147             
1148                         trimData* tempTrim = new trimData(filename,
1149                                               qFileName, nameFile, countfile,
1150                                               (trimFASTAFileName+extension),
1151                                               (scrapFASTAFileName+extension),
1152                                               (trimQualFileName+extension),
1153                                               (scrapQualFileName+extension),
1154                                               (trimNameFileName+extension),
1155                                               (scrapNameFileName+extension),
1156                                               (trimCountFileName+extension),
1157                                               (scrapCountFileName+extension),
1158                                               (groupFile+extension),
1159                                               tempFASTAFileNames,
1160                                               tempPrimerQualFileNames,
1161                                               tempNameFileNames,
1162                                               lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
1163                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, pairedBarcodes, pairedPrimers, pairedOligos,
1164                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
1165                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
1166                                              minLength, maxAmbig, maxHomoP, maxLength, flip, reorient, nameMap, nameCount);
1167                         pDataArray.push_back(tempTrim);
1168             
1169                         hThreadArray[h] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
1170                 }
1171         
1172         //parent do my part
1173                 ofstream temp;
1174                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
1175                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
1176                 if(qFileName != ""){
1177                         m->openOutputFile(trimQualFileName, temp);              temp.close();
1178                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
1179                 }
1180                 if (nameFile != "") {
1181                         m->openOutputFile(trimNameFileName, temp);              temp.close();
1182                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
1183                 }
1184         vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1185         vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1186         vector<vector<string> > tempNameFileNames = nameFileNames;
1187         if(allFiles){
1188             ofstream temp;
1189             string extension = toString(processors-1) + ".temp";
1190             for(int i=0;i<tempFASTAFileNames.size();i++){
1191                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1192                     if (tempFASTAFileNames[i][j] != "") {
1193                         tempFASTAFileNames[i][j] += extension;
1194                         m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
1195                         
1196                         if(qFileName != ""){
1197                             tempPrimerQualFileNames[i][j] += extension;
1198                             m->openOutputFile(tempPrimerQualFileNames[i][j], temp);             temp.close();
1199                         }
1200                         if(nameFile != ""){
1201                             tempNameFileNames[i][j] += extension;
1202                             m->openOutputFile(tempNameFileNames[i][j], temp);           temp.close();
1203                         }
1204                     }
1205                 }
1206             }
1207         }
1208         
1209                 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames, tempNameFileNames, lines[processors-1], qLines[processors-1]);
1210         processIDS.push_back(processors-1);
1211
1212         
1213                 //Wait until all threads have terminated.
1214                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1215                 
1216                 //Close all thread handles and free memory allocations.
1217                 for(int i=0; i < pDataArray.size(); i++){
1218             if (pDataArray[i]->count != pDataArray[i]->lineEnd) {
1219                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->lineEnd) + " sequences assigned to it, quitting. \n"); m->control_pressed = true;
1220             }
1221                         for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
1222                 map<string, int>::iterator it2 = groupCounts.find(it->first);
1223                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
1224                 else { groupCounts[it->first] += it->second; }
1225             }
1226             for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
1227                 map<string, string>::iterator it2 = groupMap.find(it->first);
1228                 if (it2 == groupMap.end()) {    groupMap[it->first] = it->second; }
1229                 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
1230             }
1231             CloseHandle(hThreadArray[i]);
1232                         delete pDataArray[i];
1233                 }
1234         
1235 #endif          
1236         
1237         
1238         //append files
1239                 for(int i=0;i<processIDS.size();i++){
1240                         
1241                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1242                         
1243                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
1244                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
1245                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1246                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1247                         
1248                         if(qFileName != ""){
1249                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1250                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1251                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1252                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1253                         }
1254                         
1255                         if(nameFile != ""){
1256                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1257                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1258                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1259                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1260                         }
1261             
1262             if(countfile != ""){
1263                                 m->appendFiles((trimCountFileName + toString(processIDS[i]) + ".temp"), trimCountFileName);
1264                                 m->mothurRemove((trimCountFileName + toString(processIDS[i]) + ".temp"));
1265                                 m->appendFiles((scrapCountFileName + toString(processIDS[i]) + ".temp"), scrapCountFileName);
1266                                 m->mothurRemove((scrapCountFileName + toString(processIDS[i]) + ".temp"));
1267                         }
1268                         
1269                         if((createGroup)&&(countfile == "")){
1270                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1271                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1272                         }
1273                         
1274                         
1275                         if(allFiles){
1276                                 for(int j=0;j<fastaFileNames.size();j++){
1277                                         for(int k=0;k<fastaFileNames[j].size();k++){
1278                                                 if (fastaFileNames[j][k] != "") {
1279                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1280                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1281                                                         
1282                                                         if(qFileName != ""){
1283                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1284                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1285                                                         }
1286                                                         
1287                                                         if(nameFile != ""){
1288                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1289                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1290                                                         }
1291                                                 }
1292                                         }
1293                                 }
1294                         }
1295                         
1296             #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1297                         if(createGroup){
1298                                 ifstream in;
1299                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1300                                 m->openInputFile(tempFile, in);
1301                                 int tempNum;
1302                                 string group;
1303                                 
1304                                 in >> tempNum; m->gobble(in);
1305                                 
1306                                 if (tempNum != 0) {
1307                                         for (int i = 0; i < tempNum; i++) { 
1308                         int groupNum;
1309                                                 in >> group >> groupNum; m->gobble(in);
1310                         
1311                                                 map<string, int>::iterator it = groupCounts.find(group);
1312                                                 if (it == groupCounts.end()) {  groupCounts[group] = groupNum; }
1313                                                 else { groupCounts[it->first] += groupNum; }
1314                                         }
1315                                 }
1316                 in >> tempNum; m->gobble(in);
1317                 if (tempNum != 0) {
1318                                         for (int i = 0; i < tempNum; i++) { 
1319                         string group, seqName;
1320                                                 in >> seqName >> group; m->gobble(in);
1321                         
1322                                                 map<string, string>::iterator it = groupMap.find(seqName);
1323                                                 if (it == groupMap.end()) {     groupMap[seqName] = group; }
1324                                                 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
1325                                         }
1326                                 }
1327                 
1328                                 in.close(); m->mothurRemove(tempFile);
1329                         }
1330             #endif
1331                 }
1332
1333         return exitCommand;
1334         }
1335         catch(exception& e) {
1336                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1337                 exit(1);
1338         }
1339 }
1340
1341 /**************************************************************************************************/
1342
1343 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1344         try {
1345         
1346         vector<unsigned long long> fastaFilePos;
1347                 vector<unsigned long long> qfileFilePos;
1348                 
1349                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1350                 //set file positions for fasta file
1351                 fastaFilePos = m->divideFile(filename, processors);
1352                 
1353                 //get name of first sequence in each chunk
1354                 map<string, int> firstSeqNames;
1355                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1356                         ifstream in;
1357                         m->openInputFile(filename, in);
1358                         in.seekg(fastaFilePos[i]);
1359                 
1360                         Sequence temp(in); 
1361                         firstSeqNames[temp.getName()] = i;
1362                 
1363                         in.close();
1364                 }
1365                 
1366                 if(qfilename != "")     {
1367             //seach for filePos of each first name in the qfile and save in qfileFilePos
1368             ifstream inQual;
1369             m->openInputFile(qfilename, inQual);
1370             
1371             string input;
1372             while(!inQual.eof()){       
1373                 input = m->getline(inQual);
1374                 
1375                 if (input.length() != 0) {
1376                     if(input[0] == '>'){ //this is a sequence name line
1377                         istringstream nameStream(input);
1378                         
1379                         string sname = "";  nameStream >> sname;
1380                         sname = sname.substr(1);
1381                         
1382                         for (int i = 0; i < sname.length(); i++) {
1383                             if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
1384                         }
1385                         
1386                         map<string, int>::iterator it = firstSeqNames.find(sname);
1387                         
1388                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
1389                             unsigned long long pos = inQual.tellg(); 
1390                             qfileFilePos.push_back(pos - input.length() - 1);   
1391                             firstSeqNames.erase(it);
1392                         }
1393                     }
1394                 }
1395                 
1396                 if (firstSeqNames.size() == 0) { break; }
1397             }
1398             inQual.close();
1399             
1400             
1401             if (firstSeqNames.size() != 0) { 
1402                 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1403                     m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1404                 }
1405                 qFileName = "";
1406                 return processors;
1407             }
1408             
1409             //get last file position of qfile
1410             FILE * pFile;
1411             unsigned long long size;
1412             
1413             //get num bytes in file
1414             pFile = fopen (qfilename.c_str(),"rb");
1415             if (pFile==NULL) perror ("Error opening file");
1416             else{
1417                 fseek (pFile, 0, SEEK_END);
1418                 size=ftell (pFile);
1419                 fclose (pFile);
1420             }
1421             
1422             qfileFilePos.push_back(size);
1423         }
1424         
1425         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1426             if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); }
1427                         lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1428                         if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
1429                 }       
1430                 if(qfilename == "")     {       qLines = lines; } //files with duds
1431                 
1432                 return processors;
1433                 
1434                 #else
1435             
1436         if (processors == 1) { //save time
1437                         //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1438                         //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1439             lines.push_back(linePair(0, 1000));
1440             if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
1441         }else{
1442             int numFastaSeqs = 0;
1443             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
1444             if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1445         
1446             if (qfilename != "") { 
1447                 int numQualSeqs = 0;
1448                 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
1449                 
1450                 if (numFastaSeqs != numQualSeqs) {
1451                     m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
1452                 }
1453             }
1454         
1455             //figure out how many sequences you have to process
1456             int numSeqsPerProcessor = numFastaSeqs / processors;
1457             for (int i = 0; i < processors; i++) {
1458                 int startIndex =  i * numSeqsPerProcessor;
1459                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
1460                 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1461                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1462             }
1463         }
1464             if(qfilename == "") {       qLines = lines; } //files with duds
1465                         return 1;
1466                 
1467                 #endif
1468         }
1469         catch(exception& e) {
1470                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1471                 exit(1);
1472         }
1473 }
1474
1475 //***************************************************************************************************************
1476
1477 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1478         try {
1479                 ifstream inOligos;
1480                 m->openInputFile(oligoFile, inOligos);
1481                 
1482                 ofstream test;
1483                 
1484                 string type, oligo, roligo, group;
1485         bool hasPrimer = false; bool hasPairedBarcodes = false;
1486
1487                 int indexPrimer = 0;
1488                 int indexBarcode = 0;
1489         int indexPairedPrimer = 0;
1490                 int indexPairedBarcode = 0;
1491         set<string> uniquePrimers;
1492         set<string> uniqueBarcodes;
1493                 
1494                 while(!inOligos.eof()){
1495
1496                         inOligos >> type; 
1497             
1498                         if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
1499             
1500                         if(type[0] == '#'){
1501                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1502                                 m->gobble(inOligos);
1503                         }
1504                         else{
1505                                 m->gobble(inOligos);
1506                                 //make type case insensitive
1507                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1508                                 
1509                                 inOligos >> oligo;
1510                 
1511                 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
1512                                 
1513                                 for(int i=0;i<oligo.length();i++){
1514                                         oligo[i] = toupper(oligo[i]);
1515                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1516                                 }
1517                                 
1518                                 if(type == "FORWARD"){
1519                                         group = "";
1520                                         
1521                                         // get rest of line in case there is a primer name
1522                                         while (!inOligos.eof()) {       
1523                                                 char c = inOligos.get(); 
1524                                                 if (c == 10 || c == 13){        break;  }
1525                                                 else if (c == 32 || c == 9){;} //space or tab
1526                                                 else {  group += c;  }
1527                                         } 
1528                                         
1529                                         //check for repeat barcodes
1530                                         map<string, int>::iterator itPrime = primers.find(oligo);
1531                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1532                                         
1533                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); }  }
1534                     
1535                                         primers[oligo]=indexPrimer; indexPrimer++;              
1536                                         primerNameVector.push_back(group);
1537                                 }
1538                 else if (type == "PRIMER"){
1539                     m->gobble(inOligos);
1540                                         
1541                     inOligos >> roligo;
1542                     
1543                     for(int i=0;i<roligo.length();i++){
1544                         roligo[i] = toupper(roligo[i]);
1545                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
1546                     }
1547                     roligo = reverseOligo(roligo);
1548                     
1549                     group = "";
1550                     
1551                                         // get rest of line in case there is a primer name
1552                                         while (!inOligos.eof()) {
1553                                                 char c = inOligos.get();
1554                                                 if (c == 10 || c == 13){        break;  }
1555                                                 else if (c == 32 || c == 9){;} //space or tab
1556                                                 else {  group += c;  }
1557                                         }
1558                     
1559                     oligosPair newPrimer(oligo, roligo);
1560                     
1561                      if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
1562                                         
1563                                         //check for repeat barcodes
1564                     string tempPair = oligo+roligo;
1565                     if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine();  }
1566                     else { uniquePrimers.insert(tempPair); }
1567                                         
1568                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
1569                     
1570                                         pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
1571                                         primerNameVector.push_back(group);
1572                     hasPrimer = true;
1573                 }
1574                                 else if(type == "REVERSE"){
1575                                         //Sequence oligoRC("reverse", oligo);
1576                                         //oligoRC.reverseComplement();
1577                     string oligoRC = reverseOligo(oligo);
1578                                         revPrimer.push_back(oligoRC);
1579                                 }
1580                                 else if(type == "BARCODE"){
1581                                         inOligos >> group;
1582                     
1583                     //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
1584                     //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
1585                     
1586                     string temp = "";
1587                     while (!inOligos.eof())     {
1588                                                 char c = inOligos.get();
1589                                                 if (c == 10 || c == 13){        break;  }
1590                                                 else if (c == 32 || c == 9){;} //space or tab
1591                                                 else {  temp += c;  }
1592                                         }
1593                                         
1594                     //then this is illumina data with 4 columns
1595                     if (temp != "") {
1596                         hasPairedBarcodes = true;
1597                         string reverseBarcode = group; //reverseOligo(group); //reverse barcode
1598                         group = temp;
1599                         
1600                         for(int i=0;i<reverseBarcode.length();i++){
1601                             reverseBarcode[i] = toupper(reverseBarcode[i]);
1602                             if(reverseBarcode[i] == 'U')        {       reverseBarcode[i] = 'T';        }
1603                         }
1604                         
1605                         reverseBarcode = reverseOligo(reverseBarcode);
1606                         oligosPair newPair(oligo, reverseBarcode);
1607                         
1608                         if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
1609                         
1610                         //check for repeat barcodes
1611                         string tempPair = oligo+reverseBarcode;
1612                         if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse +  " is in your oligos file already, disregarding."); m->mothurOutEndLine();  }
1613                         else { uniqueBarcodes.insert(tempPair); }
1614                         
1615                         pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
1616                         barcodeNameVector.push_back(group);
1617                     }else {                             
1618                         //check for repeat barcodes
1619                         map<string, int>::iterator itBar = barcodes.find(oligo);
1620                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1621                         
1622                         barcodes[oligo]=indexBarcode; indexBarcode++;
1623                         barcodeNameVector.push_back(group);
1624                     }
1625                                 }else if(type == "LINKER"){
1626                                         linker.push_back(oligo);
1627                                 }else if(type == "SPACER"){
1628                                         spacer.push_back(oligo);
1629                                 }
1630                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1631                         }
1632                         m->gobble(inOligos);
1633                 }       
1634                 inOligos.close();
1635                 
1636         if (hasPairedBarcodes || hasPrimer) {
1637             pairedOligos = true;
1638             if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true;  m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine();  return 0; }
1639         }
1640         
1641                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1642                 
1643                 //add in potential combos
1644                 if(barcodeNameVector.size() == 0){
1645                         barcodes[""] = 0;
1646                         barcodeNameVector.push_back("");                        
1647                 }
1648                 
1649                 if(primerNameVector.size() == 0){
1650                         primers[""] = 0;
1651                         primerNameVector.push_back("");                 
1652                 }
1653                 
1654                 fastaFileNames.resize(barcodeNameVector.size());
1655                 for(int i=0;i<fastaFileNames.size();i++){
1656                         fastaFileNames[i].assign(primerNameVector.size(), "");
1657                 }
1658                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1659                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1660                 
1661                 if(allFiles){
1662                         set<string> uniqueNames; //used to cleanup outputFileNames
1663             if (pairedOligos) {
1664                 for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
1665                     for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
1666                         
1667                         string primerName = primerNameVector[itPrimer->first];
1668                         string barcodeName = barcodeNameVector[itBar->first];
1669                         
1670                         if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1671                         else {
1672                             string comboGroupName = "";
1673                             string fastaFileName = "";
1674                             string qualFileName = "";
1675                             string nameFileName = "";
1676                             string countFileName = "";
1677                             
1678                             if(primerName == ""){
1679                                 comboGroupName = barcodeNameVector[itBar->first];
1680                             }
1681                             else{
1682                                 if(barcodeName == ""){
1683                                     comboGroupName = primerNameVector[itPrimer->first];
1684                                 }
1685                                 else{
1686                                     comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
1687                                 }
1688                             }
1689                             
1690                             
1691                             ofstream temp;
1692                             map<string, string> variables;
1693                             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1694                             variables["[tag]"] = comboGroupName;
1695                             fastaFileName = getOutputFileName("fasta", variables);
1696                             if (uniqueNames.count(fastaFileName) == 0) {
1697                                 outputNames.push_back(fastaFileName);
1698                                 outputTypes["fasta"].push_back(fastaFileName);
1699                                 uniqueNames.insert(fastaFileName);
1700                             }
1701                             
1702                             fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
1703                             m->openOutputFile(fastaFileName, temp);             temp.close();
1704                             
1705                             if(qFileName != ""){
1706                                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1707                                 qualFileName = getOutputFileName("qfile", variables);
1708                                 if (uniqueNames.count(qualFileName) == 0) {
1709                                     outputNames.push_back(qualFileName);
1710                                     outputTypes["qfile"].push_back(qualFileName);
1711                                 }
1712                                 
1713                                 qualFileNames[itBar->first][itPrimer->first] = qualFileName;
1714                                 m->openOutputFile(qualFileName, temp);          temp.close();
1715                             }
1716                             
1717                             if(nameFile != ""){
1718                                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1719                                 nameFileName = getOutputFileName("name", variables);
1720                                 if (uniqueNames.count(nameFileName) == 0) {
1721                                     outputNames.push_back(nameFileName);
1722                                     outputTypes["name"].push_back(nameFileName);
1723                                 }
1724                                 
1725                                 nameFileNames[itBar->first][itPrimer->first] = nameFileName;
1726                                 m->openOutputFile(nameFileName, temp);          temp.close();
1727                             }
1728                         }
1729                     }
1730                 }
1731             }else {
1732                 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1733                     for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1734                         
1735                         string primerName = primerNameVector[itPrimer->second];
1736                         string barcodeName = barcodeNameVector[itBar->second];
1737                         
1738                         if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing 
1739                         else {
1740                             string comboGroupName = "";
1741                             string fastaFileName = "";
1742                             string qualFileName = "";
1743                             string nameFileName = "";
1744                             string countFileName = "";
1745                             
1746                             if(primerName == ""){
1747                                 comboGroupName = barcodeNameVector[itBar->second];
1748                             }
1749                             else{
1750                                 if(barcodeName == ""){
1751                                     comboGroupName = primerNameVector[itPrimer->second];
1752                                 }
1753                                 else{
1754                                     comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1755                                 }
1756                             }
1757                             
1758                             
1759                             ofstream temp;
1760                             map<string, string> variables; 
1761                             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1762                             variables["[tag]"] = comboGroupName;
1763                             fastaFileName = getOutputFileName("fasta", variables);
1764                             if (uniqueNames.count(fastaFileName) == 0) {
1765                                 outputNames.push_back(fastaFileName);
1766                                 outputTypes["fasta"].push_back(fastaFileName);
1767                                 uniqueNames.insert(fastaFileName);
1768                             }
1769                             
1770                             fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1771                             m->openOutputFile(fastaFileName, temp);             temp.close();
1772                             
1773                             if(qFileName != ""){
1774                                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1775                                 qualFileName = getOutputFileName("qfile", variables);
1776                                 if (uniqueNames.count(qualFileName) == 0) {
1777                                     outputNames.push_back(qualFileName);
1778                                     outputTypes["qfile"].push_back(qualFileName);
1779                                 }
1780                                 
1781                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1782                                 m->openOutputFile(qualFileName, temp);          temp.close();
1783                             }
1784                             
1785                             if(nameFile != ""){
1786                                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1787                                 nameFileName = getOutputFileName("name", variables);
1788                                 if (uniqueNames.count(nameFileName) == 0) {
1789                                     outputNames.push_back(nameFileName);
1790                                     outputTypes["name"].push_back(nameFileName);
1791                                 }
1792                                 
1793                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1794                                 m->openOutputFile(nameFileName, temp);          temp.close();
1795                             }
1796                         }
1797                     }
1798                 }
1799             }
1800                 }
1801                 numFPrimers = primers.size();
1802         if (pairedOligos) { numFPrimers  = pairedPrimers.size(); }
1803                 numRPrimers = revPrimer.size();
1804         numLinkers = linker.size();
1805         numSpacers = spacer.size();
1806                 
1807                 bool allBlank = true;
1808                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1809                         if (barcodeNameVector[i] != "") {
1810                                 allBlank = false;
1811                                 break;
1812                         }
1813                 }
1814                 for (int i = 0; i < primerNameVector.size(); i++) {
1815                         if (primerNameVector[i] != "") {
1816                                 allBlank = false;
1817                                 break;
1818                         }
1819                 }
1820
1821                 if (allBlank) {
1822                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1823                         allFiles = false;
1824                         return false;
1825                 }
1826                 
1827                 return true;
1828                 
1829         }
1830         catch(exception& e) {
1831                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1832                 exit(1);
1833         }
1834 }
1835 //***************************************************************************************************************
1836
1837 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1838         try {
1839                 bool success = 1;
1840                 if(qscores.getName() != ""){
1841                         qscores.trimQScores(-1, keepFirst);
1842                 }
1843                 sequence.trim(keepFirst);
1844                 return success;
1845         }
1846         catch(exception& e) {
1847                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1848                 exit(1);
1849         }
1850         
1851 }       
1852
1853 //***************************************************************************************************************
1854
1855 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1856         try {
1857                 bool success = 0;
1858                 
1859                 int length = sequence.getNumBases() - removeLast;
1860                 
1861                 if(length > 0){
1862                         if(qscores.getName() != ""){
1863                                 qscores.trimQScores(-1, length);
1864                         }
1865                         sequence.trim(length);
1866                         success = 1;
1867                 }
1868                 else{
1869                         success = 0;
1870                 }
1871
1872                 return success;
1873         }
1874         catch(exception& e) {
1875                 m->errorOut(e, "removeLastTrim", "countDiffs");
1876                 exit(1);
1877         }
1878         
1879 }       
1880
1881 //***************************************************************************************************************
1882
1883 bool TrimSeqsCommand::cullLength(Sequence& seq){
1884         try {
1885         
1886                 int length = seq.getNumBases();
1887                 bool success = 0;       //guilty until proven innocent
1888                 
1889                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1890                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1891                 else                                                                                            {       success = 0;    }
1892                 
1893                 return success;
1894         
1895         }
1896         catch(exception& e) {
1897                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1898                 exit(1);
1899         }
1900         
1901 }
1902
1903 //***************************************************************************************************************
1904
1905 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1906         try {
1907                 int longHomoP = seq.getLongHomoPolymer();
1908                 bool success = 0;       //guilty until proven innocent
1909                 
1910                 if(longHomoP <= maxHomoP){      success = 1;    }
1911                 else                                    {       success = 0;    }
1912                 
1913                 return success;
1914         }
1915         catch(exception& e) {
1916                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1917                 exit(1);
1918         }
1919         
1920 }
1921 //********************************************************************/
1922 string TrimSeqsCommand::reverseOligo(string oligo){
1923         try {
1924         string reverse = "";
1925         
1926         for(int i=oligo.length()-1;i>=0;i--){
1927             
1928             if(oligo[i] == 'A')         {       reverse += 'T'; }
1929             else if(oligo[i] == 'T'){   reverse += 'A'; }
1930             else if(oligo[i] == 'U'){   reverse += 'A'; }
1931             
1932             else if(oligo[i] == 'G'){   reverse += 'C'; }
1933             else if(oligo[i] == 'C'){   reverse += 'G'; }
1934             
1935             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1936             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1937             
1938             else if(oligo[i] == 'M'){   reverse += 'K'; }
1939             else if(oligo[i] == 'K'){   reverse += 'M'; }
1940             
1941             else if(oligo[i] == 'W'){   reverse += 'W'; }
1942             else if(oligo[i] == 'S'){   reverse += 'S'; }
1943             
1944             else if(oligo[i] == 'B'){   reverse += 'V'; }
1945             else if(oligo[i] == 'V'){   reverse += 'B'; }
1946             
1947             else if(oligo[i] == 'D'){   reverse += 'H'; }
1948             else if(oligo[i] == 'H'){   reverse += 'D'; }
1949             
1950             else                                                {       reverse += 'N'; }
1951         }
1952         
1953         
1954         return reverse;
1955     }
1956         catch(exception& e) {
1957                 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1958                 exit(1);
1959         }
1960 }
1961
1962 //***************************************************************************************************************
1963
1964 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1965         try {
1966                 int numNs = seq.getAmbigBases();
1967                 bool success = 0;       //guilty until proven innocent
1968                 
1969                 if(numNs <= maxAmbig)   {       success = 1;    }
1970                 else                                    {       success = 0;    }
1971                 
1972                 return success;
1973         }
1974         catch(exception& e) {
1975                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1976                 exit(1);
1977         }
1978         
1979 }
1980 //***************************************************************************************************************