]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
added ignore option to oligos files for make.contigs, trim.seqs, trim.flows
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14
15 //**********************************************************************************************************************
16 vector<string> TrimSeqsCommand::setParameters(){        
17         try {
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
19                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
20                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","qfile",false,false,true); parameters.push_back(pqfile);
21                 CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname);
22         CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount);
23                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pflip);
24                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig);
25                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop);
26                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength);
27                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength);
28                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
29                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
30         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
31                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
32         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
33                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
34                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
35                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward);
36                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim);
37                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold);
38                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage);
39                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(prollaverage);
40                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqwindowaverage);
41                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pqstepsize);
42                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "","",false,false); parameters.push_back(pqwindowsize);
43                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pkeepfirst);
44                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "","",false,false); parameters.push_back(premovelast);
45                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
46                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
47                         
48                 vector<string> myArray;
49                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
50                 return myArray;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
54                 exit(1);
55         }
56 }
57 //**********************************************************************************************************************
58 string TrimSeqsCommand::getHelpString(){        
59         try {
60                 string helpString = "";
61                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
62                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
63                 helpString += "The trim.seqs command parameters are fasta, name, count, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
64                 helpString += "The fasta parameter is required.\n";
65                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
66                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
67                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
68         helpString += "The count parameter allows you to provide a count file with your fasta file.\n";
69                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
70                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
71                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
72                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
73                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
74                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
75                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
76         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
77                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
78                 helpString += "The qfile parameter allows you to provide a quality file.\n";
79                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
80                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
81                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
82                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
83                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
84                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
85                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
86                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
87                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
88                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
89                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
90                 helpString += "The trim.seqs command should be in the following format: \n";
91                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
92                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
93                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
94                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
95                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
96                 return helpString;
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
100                 exit(1);
101         }
102 }
103 //**********************************************************************************************************************
104 string TrimSeqsCommand::getOutputPattern(string type) {
105     try {
106         string pattern = "";
107         
108         if (type == "qfile") {  pattern = "[filename],[tag],qual"; } 
109         else if (type == "fasta") {  pattern = "[filename],[tag],fasta"; } 
110         else if (type == "group") {  pattern = "[filename],groups"; }
111         else if (type == "name") {  pattern = "[filename],[tag],names"; }
112         else if (type == "count") {  pattern = "[filename],[tag],count_table-[filename],count_table"; }
113         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
114         
115         return pattern;
116     }
117     catch(exception& e) {
118         m->errorOut(e, "TrimSeqsCommand", "getOutputPattern");
119         exit(1);
120     }
121 }
122 //**********************************************************************************************************************
123
124 TrimSeqsCommand::TrimSeqsCommand(){     
125         try {
126                 abort = true; calledHelp = true; 
127                 setParameters();
128                 vector<string> tempOutNames;
129                 outputTypes["fasta"] = tempOutNames;
130                 outputTypes["qfile"] = tempOutNames;
131                 outputTypes["group"] = tempOutNames;
132                 outputTypes["name"] = tempOutNames;
133         outputTypes["count"] = tempOutNames;
134         }
135         catch(exception& e) {
136                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
137                 exit(1);
138         }
139 }
140 //***************************************************************************************************************
141
142 TrimSeqsCommand::TrimSeqsCommand(string option)  {
143         try {
144                 
145                 abort = false; calledHelp = false;   
146                 comboStarts = 0;
147                 
148                 //allow user to run help
149                 if(option == "help") { help(); abort = true; calledHelp = true; }
150                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
151                 
152                 else {
153                         vector<string> myArray = setParameters();
154                         
155                         OptionParser parser(option);
156                         map<string,string> parameters = parser.getParameters();
157                         
158                         ValidParameters validParameter;
159                         map<string,string>::iterator it;
160                         
161                         //check to make sure all parameters are valid for command
162                         for (it = parameters.begin(); it != parameters.end(); it++) { 
163                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
164                         }
165                         
166                         //initialize outputTypes
167                         vector<string> tempOutNames;
168                         outputTypes["fasta"] = tempOutNames;
169                         outputTypes["qfile"] = tempOutNames;
170                         outputTypes["group"] = tempOutNames;
171                         outputTypes["name"] = tempOutNames;
172             outputTypes["count"] = tempOutNames;
173                         
174                         //if the user changes the input directory command factory will send this info to us in the output parameter 
175                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
176                         if (inputDir == "not found"){   inputDir = "";          }
177                         else {
178                                 string path;
179                                 it = parameters.find("fasta");
180                                 //user has given a template file
181                                 if(it != parameters.end()){ 
182                                         path = m->hasPath(it->second);
183                                         //if the user has not given a path then, add inputdir. else leave path alone.
184                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
185                                 }
186                                 
187                                 it = parameters.find("oligos");
188                                 //user has given a template file
189                                 if(it != parameters.end()){ 
190                                         path = m->hasPath(it->second);
191                                         //if the user has not given a path then, add inputdir. else leave path alone.
192                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
193                                 }
194                                 
195                                 it = parameters.find("qfile");
196                                 //user has given a template file
197                                 if(it != parameters.end()){ 
198                                         path = m->hasPath(it->second);
199                                         //if the user has not given a path then, add inputdir. else leave path alone.
200                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
201                                 }
202                                 
203                                 it = parameters.find("name");
204                                 //user has given a template file
205                                 if(it != parameters.end()){ 
206                                         path = m->hasPath(it->second);
207                                         //if the user has not given a path then, add inputdir. else leave path alone.
208                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
209                                 }
210                 
211                 it = parameters.find("count");
212                                 //user has given a template file
213                                 if(it != parameters.end()){ 
214                                         path = m->hasPath(it->second);
215                                         //if the user has not given a path then, add inputdir. else leave path alone.
216                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
217                                 }
218                                 
219                         }
220
221                         
222                         //check for required parameters
223                         fastaFile = validParameter.validFile(parameters, "fasta", true);
224                         if (fastaFile == "not found") {                                 
225                                 fastaFile = m->getFastaFile(); 
226                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
227                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
228                         }else if (fastaFile == "not open") { abort = true; }    
229                         else { m->setFastaFile(fastaFile); }
230                         
231                         //if the user changes the output directory command factory will send this info to us in the output parameter 
232                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
233                                 outputDir = ""; 
234                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
235                         }
236                 
237                         
238                         //check for optional parameter and set defaults
239                         // ...at some point should added some additional type checking...
240                         string temp;
241                         temp = validParameter.validFile(parameters, "flip", false);
242                         if (temp == "not found")    {   flip = 0;       }
243                         else {  flip = m->isTrue(temp);         }
244                 
245                         temp = validParameter.validFile(parameters, "oligos", true);
246                         if (temp == "not found"){       oligoFile = "";         }
247                         else if(temp == "not open"){    abort = true;   } 
248                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
249                         
250                         
251                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
252                         m->mothurConvert(temp, maxAmbig);  
253
254                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
255                         m->mothurConvert(temp, maxHomoP);  
256
257                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
258                         m->mothurConvert(temp, minLength); 
259                         
260                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
261                         m->mothurConvert(temp, maxLength);
262                         
263                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
264                         m->mothurConvert(temp, bdiffs);
265                         
266                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
267                         m->mothurConvert(temp, pdiffs);
268             
269             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
270                         m->mothurConvert(temp, ldiffs);
271             
272             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
273                         m->mothurConvert(temp, sdiffs);
274                         
275                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
276                         m->mothurConvert(temp, tdiffs);
277                         
278                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
279                         
280                         temp = validParameter.validFile(parameters, "qfile", true);     
281                         if (temp == "not found")        {       qFileName = "";         }
282                         else if(temp == "not open")     {       abort = true;           }
283                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
284                         
285                         temp = validParameter.validFile(parameters, "name", true);      
286                         if (temp == "not found")        {       nameFile = "";          }
287                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
288                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
289             
290             countfile = validParameter.validFile(parameters, "count", true);
291                         if (countfile == "not open") { abort = true; countfile = ""; }  
292                         else if (countfile == "not found") { countfile = ""; }
293                         else { m->setCountTableFile(countfile); }
294                         
295             if ((countfile != "") && (nameFile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
296                         
297                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
298                         m->mothurConvert(temp, qThreshold);
299                         
300                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
301                         qtrim = m->isTrue(temp);
302
303                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
304                         convert(temp, qRollAverage);
305
306                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
307                         convert(temp, qWindowAverage);
308
309                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
310                         convert(temp, qWindowSize);
311
312                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
313                         convert(temp, qWindowStep);
314
315                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
316                         convert(temp, qAverage);
317
318                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
319                         convert(temp, keepFirst);
320
321                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
322                         convert(temp, removeLast);
323                         
324                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
325                         allFiles = m->isTrue(temp);
326             
327             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
328                         keepforward = m->isTrue(temp);
329                         
330                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
331                         m->setProcessors(temp);
332                         m->mothurConvert(temp, processors); 
333                         
334                         
335                         if(allFiles && (oligoFile == "")){
336                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
337                         }
338                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
339                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
340                                 qAverage=0;
341                                 qThreshold=0;
342                         }
343                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
344                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
345                                 abort = true;
346                         }
347                         
348             if (countfile == "") {
349                 if (nameFile == "") {
350                     vector<string> files; files.push_back(fastaFile);
351                     parser.getNameFile(files);
352                 }
353             }
354                 }
355
356         }
357         catch(exception& e) {
358                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
359                 exit(1);
360         }
361 }
362 //***************************************************************************************************************
363
364 int TrimSeqsCommand::execute(){
365         try{
366         
367                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
368                 
369                 numFPrimers = 0;  //this needs to be initialized
370                 numRPrimers = 0;
371         numSpacers = 0;
372         numLinkers = 0;
373                 createGroup = false;
374                 vector<vector<string> > fastaFileNames;
375                 vector<vector<string> > qualFileNames;
376                 vector<vector<string> > nameFileNames;
377                 
378         map<string, string> variables; 
379                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
380         variables["[tag]"] = "trim";
381                 string trimSeqFile = getOutputFileName("fasta",variables);
382         string trimQualFile = getOutputFileName("qfile",variables);
383                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
384         
385         variables["[tag]"] = "scrap";
386                 string scrapSeqFile = getOutputFileName("fasta",variables);
387         string scrapQualFile = getOutputFileName("qfile",variables);
388                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
389                 
390                 if (qFileName != "") {
391                         outputNames.push_back(trimQualFile);
392                         outputNames.push_back(scrapQualFile);
393                         outputTypes["qfile"].push_back(trimQualFile);
394                         outputTypes["qfile"].push_back(scrapQualFile); 
395                 }
396                 
397         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
398         variables["[tag]"] = "trim";
399                 string trimNameFile = getOutputFileName("name",variables);
400         variables["[tag]"] = "scrap";
401                 string scrapNameFile = getOutputFileName("name",variables);
402                 
403                 if (nameFile != "") {
404                         m->readNames(nameFile, nameMap);
405                         outputNames.push_back(trimNameFile);
406                         outputNames.push_back(scrapNameFile);
407                         outputTypes["name"].push_back(trimNameFile);
408                         outputTypes["name"].push_back(scrapNameFile); 
409                 }
410         
411         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
412         variables["[tag]"] = "trim";
413         string trimCountFile = getOutputFileName("count",variables);
414         variables["[tag]"] = "scrap";
415                 string scrapCountFile = getOutputFileName("count",variables);
416                 
417                 if (countfile != "") {
418             CountTable ct;
419             ct.readTable(countfile);
420             nameCount = ct.getNameMap();
421                         outputNames.push_back(trimCountFile);
422                         outputNames.push_back(scrapCountFile);
423                         outputTypes["count"].push_back(trimCountFile);
424                         outputTypes["count"].push_back(scrapCountFile); 
425                 }
426
427                 
428                 if (m->control_pressed) { return 0; }
429                 
430                 string outputGroupFileName;
431                 if(oligoFile != ""){
432                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
433                         if ((createGroup) && (countfile == "")){
434                 map<string, string> myvariables; 
435                 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
436                                 outputGroupFileName = getOutputFileName("group",myvariables);
437                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
438                         }
439                 }
440        
441         //fills lines and qlines
442                 setLines(fastaFile, qFileName);
443                 
444         if(processors == 1){
445             driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
446         }else{
447             createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
448         }       
449                 
450                 
451                 if (m->control_pressed) {  return 0; }                  
452         
453                 if(allFiles){
454                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
455                         map<string, string>::iterator it;
456                         set<string> namesToRemove;
457                         for(int i=0;i<fastaFileNames.size();i++){
458                                 for(int j=0;j<fastaFileNames[0].size();j++){
459                                         if (fastaFileNames[i][j] != "") {
460                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
461                                                         if(m->isBlank(fastaFileNames[i][j])){
462                                                                 m->mothurRemove(fastaFileNames[i][j]);
463                                                                 namesToRemove.insert(fastaFileNames[i][j]);
464                                                         
465                                                                 if(qFileName != ""){
466                                                                         m->mothurRemove(qualFileNames[i][j]);
467                                                                         namesToRemove.insert(qualFileNames[i][j]);
468                                                                 }
469                                                                 
470                                                                 if(nameFile != ""){
471                                                                         m->mothurRemove(nameFileNames[i][j]);
472                                                                         namesToRemove.insert(nameFileNames[i][j]);
473                                                                 }
474                                                         }else{  
475                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
476                                                                 if (it == uniqueFastaNames.end()) {     
477                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
478                                                                 }       
479                                                         }
480                                                 }
481                                         }
482                                 }
483                         }
484                         
485                         //remove names for outputFileNames, just cleans up the output
486                         vector<string> outputNames2;
487                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
488                         outputNames = outputNames2;
489                         
490             for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
491                 ifstream in;
492                 m->openInputFile(it->first, in);
493                 
494                 ofstream out;
495                 map<string, string> myvariables; 
496                 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(it->first));
497                 string thisGroupName = "";
498                 if (countfile == "") { thisGroupName = getOutputFileName("group",myvariables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); }
499                 else {  thisGroupName = getOutputFileName("count",myvariables); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName);  }
500                 m->openOutputFile(thisGroupName, out);
501                 
502                 if (countfile != "") {  out << "Representative_Sequence\ttotal\t" << it->second << endl;  }
503                 
504                 while (!in.eof()){
505                     if (m->control_pressed) { break; }
506                     
507                     Sequence currSeq(in); m->gobble(in);
508                     if (countfile == "") {  
509                         out << currSeq.getName() << '\t' << it->second << endl;  
510                         
511                         if (nameFile != "") {
512                             map<string, string>::iterator itName = nameMap.find(currSeq.getName());
513                             if (itName != nameMap.end()) { 
514                                 vector<string> thisSeqsNames; 
515                                 m->splitAtChar(itName->second, thisSeqsNames, ',');
516                                 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
517                                     out << thisSeqsNames[k] << '\t' << it->second << endl;
518                                 }
519                             }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                       
520                         }
521                     }else { 
522                         map<string, int>::iterator itTotalReps = nameCount.find(currSeq.getName());
523                         if (itTotalReps != nameCount.end()) { out << currSeq.getName() << '\t' << itTotalReps->second << '\t' << itTotalReps->second << endl; }
524                         else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
525                     }
526                 }
527                 in.close();
528                 out.close();
529             }
530             
531             if (countfile != "") { //create countfile with group info included
532                 CountTable* ct = new CountTable();
533                 ct->readTable(trimCountFile);
534                 map<string, int> justTrimmedNames = ct->getNameMap();
535                 delete ct;
536                 
537                 CountTable newCt;
538                 for (map<string, int>::iterator itCount = groupCounts.begin(); itCount != groupCounts.end(); itCount++) { newCt.addGroup(itCount->first); }
539                 vector<int> tempCounts; tempCounts.resize(groupCounts.size(), 0);
540                 for (map<string, int>::iterator itNames = justTrimmedNames.begin(); itNames != justTrimmedNames.end(); itNames++) {
541                     newCt.push_back(itNames->first, tempCounts); //add it to the table with no abundance so we can set the groups abundance
542                     map<string, string>::iterator it2 = groupMap.find(itNames->first);
543                     if (it2 != groupMap.end()) { newCt.setAbund(itNames->first, it2->second, itNames->second); }
544                     else { m->mothurOut("[ERROR]: missing group info for " + itNames->first + "."); m->mothurOutEndLine(); m->control_pressed = true; }
545                 }
546                 newCt.printTable(trimCountFile);
547             }
548                 }
549                 
550                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
551
552                 //output group counts
553                 m->mothurOutEndLine();
554                 int total = 0;
555                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
556                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
557                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
558                 }
559                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
560                 
561                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
562
563                 //set fasta file as new current fastafile
564                 string current = "";
565                 itTypes = outputTypes.find("fasta");
566                 if (itTypes != outputTypes.end()) {
567                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
568                 }
569                 
570                 itTypes = outputTypes.find("name");
571                 if (itTypes != outputTypes.end()) {
572                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
573                 }
574                 
575                 itTypes = outputTypes.find("qfile");
576                 if (itTypes != outputTypes.end()) {
577                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
578                 }
579                 
580                 itTypes = outputTypes.find("group");
581                 if (itTypes != outputTypes.end()) {
582                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
583                 }
584         
585         itTypes = outputTypes.find("count");
586                 if (itTypes != outputTypes.end()) {
587                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
588                 }
589
590                 m->mothurOutEndLine();
591                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
592                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
593                 m->mothurOutEndLine();
594                 
595                 return 0;       
596                         
597         }
598         catch(exception& e) {
599                 m->errorOut(e, "TrimSeqsCommand", "execute");
600                 exit(1);
601         }
602 }
603                 
604 /**************************************************************************************/
605 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string trimCFileName, string scrapCFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) { 
606                 
607         try {
608                 
609                 ofstream trimFASTAFile;
610                 m->openOutputFile(trimFileName, trimFASTAFile);
611                 
612                 ofstream scrapFASTAFile;
613                 m->openOutputFile(scrapFileName, scrapFASTAFile);
614                 
615                 ofstream trimQualFile;
616                 ofstream scrapQualFile;
617                 if(qFileName != ""){
618                         m->openOutputFile(trimQFileName, trimQualFile);
619                         m->openOutputFile(scrapQFileName, scrapQualFile);
620                 }
621                 
622                 ofstream trimNameFile;
623                 ofstream scrapNameFile;
624                 if(nameFile != ""){
625                         m->openOutputFile(trimNFileName, trimNameFile);
626                         m->openOutputFile(scrapNFileName, scrapNameFile);
627                 }
628                 
629         ofstream trimCountFile;
630                 ofstream scrapCountFile;
631                 if(countfile != ""){
632                         m->openOutputFile(trimCFileName, trimCountFile);
633                         m->openOutputFile(scrapCFileName, scrapCountFile);
634             if (line.start == 0) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
635                 }
636                 
637                 ofstream outGroupsFile;
638                 if ((createGroup) && (countfile == "")){        m->openOutputFile(groupFileName, outGroupsFile);   }
639                 if(allFiles){
640                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
641                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
642                                         if (fastaFileNames[i][j] != "") {
643                                                 ofstream temp;
644                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
645                                                 if(qFileName != ""){
646                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
647                                                 }
648                                                 
649                                                 if(nameFile != ""){
650                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
651                                                 }
652                                         }
653                                 }
654                         }
655                 }
656                 
657                 ifstream inFASTA;
658                 m->openInputFile(filename, inFASTA);
659                 inFASTA.seekg(line.start);
660                 
661                 ifstream qFile;
662                 if(qFileName != "")     {
663                         m->openInputFile(qFileName, qFile);
664                         qFile.seekg(qline.start);  
665                 }
666                 
667                 int count = 0;
668                 bool moreSeqs = 1;
669                 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
670         
671                 while (moreSeqs) {
672                                 
673                         if (m->control_pressed) { 
674                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
675                                 if ((createGroup) && (countfile == "")) {        outGroupsFile.close();   }
676                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
677                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
678                 if(countfile != "")     {       scrapCountFile.close(); trimCountFile.close();  }
679                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;
680                         }
681                         
682                         int success = 1;
683                         string trashCode = "";
684                         int currentSeqsDiffs = 0;
685
686                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
687                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
688             
689                         QualityScores currQual;
690                         if(qFileName != ""){
691                                 currQual = QualityScores(qFile);  m->gobble(qFile);
692                 //cout << currQual.getName() << endl;
693                         }
694                         
695                         string origSeq = currSeq.getUnaligned();
696                         if (origSeq != "") {
697                                 
698                                 int barcodeIndex = 0;
699                                 int primerIndex = 0;
700                                 
701                 if(numLinkers != 0){
702                                         success = trimOligos.stripLinker(currSeq, currQual);
703                                         if(success > ldiffs)            {       trashCode += 'k';       }
704                                         else{ currentSeqsDiffs += success;  }
705
706                                 }
707                 
708                                 if(barcodes.size() != 0){
709                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
710                                         if(success > bdiffs)            {       trashCode += 'b';       }
711                                         else{ currentSeqsDiffs += success;  }
712                                 }
713                                 
714                 if(numSpacers != 0){
715                                         success = trimOligos.stripSpacer(currSeq, currQual);
716                                         if(success > sdiffs)            {       trashCode += 's';       }
717                                         else{ currentSeqsDiffs += success;  }
718
719                                 }
720                 
721                                 if(numFPrimers != 0){
722                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
723                                         if(success > pdiffs)            {       trashCode += 'f';       }
724                                         else{ currentSeqsDiffs += success;  }
725                                 }
726                                 
727                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
728                                 
729                                 if(numRPrimers != 0){
730                                         success = trimOligos.stripReverse(currSeq, currQual);
731                                         if(!success)                            {       trashCode += 'r';       }
732                                 }
733
734                                 if(keepFirst != 0){
735                                         success = keepFirstTrim(currSeq, currQual);
736                                 }
737                                 
738                                 if(removeLast != 0){
739                                         success = removeLastTrim(currSeq, currQual);
740                                         if(!success)                            {       trashCode += 'l';       }
741                                 }
742
743                                 
744                                 if(qFileName != ""){
745                                         int origLength = currSeq.getNumBases();
746                                         
747                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
748                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
749                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
750                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
751                                         else                                            {       success = 1;                            }
752                                         
753                                         //you don't want to trim, if it fails above then scrap it
754                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
755                                         
756                                         if(!success)                            {       trashCode += 'q';       }
757                                 }                               
758                 
759                                 if(minLength > 0 || maxLength > 0){
760                                         success = cullLength(currSeq);
761                                         if(!success)                            {       trashCode += 'l';       }
762                                 }
763                                 if(maxHomoP > 0){
764                                         success = cullHomoP(currSeq);
765                                         if(!success)                            {       trashCode += 'h';       }
766                                 }
767                                 if(maxAmbig != -1){
768                                         success = cullAmbigs(currSeq);
769                                         if(!success)                            {       trashCode += 'n';       }
770                                 }
771                                 
772                                 if(flip){               // should go last                       
773                                         currSeq.reverseComplement();
774                                         if(qFileName != ""){
775                                                 currQual.flipQScores(); 
776                                         }
777                                 }
778                                 
779                 if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + ", trashcode= " + trashCode); if (trashCode.length() != 0) { m->mothurOutEndLine(); } }
780                 
781                                 if(trashCode.length() == 0){
782                     string thisGroup = "";
783                     if (createGroup) {
784                                                 if(barcodes.size() != 0){
785                                                         thisGroup = barcodeNameVector[barcodeIndex];
786                                                         if (primers.size() != 0) { 
787                                                                 if (primerNameVector[primerIndex] != "") { 
788                                                                         if(thisGroup != "") {
789                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
790                                                                         }else {
791                                                                                 thisGroup = primerNameVector[primerIndex]; 
792                                                                         }
793                                                                 } 
794                                                         }
795                         }
796                     }
797                     
798                     int pos = thisGroup.find("ignore");
799                     if (pos == string::npos) {
800                         currSeq.setAligned(currSeq.getUnaligned());
801                         currSeq.printSequence(trimFASTAFile);
802                         
803                         if(qFileName != ""){
804                             currQual.printQScores(trimQualFile);
805                         }
806                         
807                         
808                         if(nameFile != ""){
809                             map<string, string>::iterator itName = nameMap.find(currSeq.getName());
810                             if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
811                             else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
812                         }
813                         
814                         int numRedundants = 0;
815                         if (countfile != "") {
816                             map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
817                             if (itCount != nameCount.end()) { 
818                                 trimCountFile << itCount->first << '\t' << itCount->second << endl;
819                                 numRedundants = itCount->second-1;
820                             }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
821                         }
822                         
823                         if (createGroup) {
824                             if(barcodes.size() != 0){
825                                                                 
826                                 if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
827                                 
828                                 if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
829                                 else {   groupMap[currSeq.getName()] = thisGroup; }
830                                 
831                                 if (nameFile != "") {
832                                     map<string, string>::iterator itName = nameMap.find(currSeq.getName());
833                                     if (itName != nameMap.end()) { 
834                                         vector<string> thisSeqsNames; 
835                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
836                                         numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
837                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
838                                             outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
839                                         }
840                                     }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                       
841                                 }
842                                 
843                                 map<string, int>::iterator it = groupCounts.find(thisGroup);
844                                 if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1 + numRedundants; }
845                                 else { groupCounts[it->first] += (1 + numRedundants); }
846                                                                 
847                             }
848                         }
849                         
850                         if(allFiles){
851                             ofstream output;
852                             m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
853                             currSeq.printSequence(output);
854                             output.close();
855                             
856                             if(qFileName != ""){
857                                 m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
858                                 currQual.printQScores(output);
859                                 output.close();                                                 
860                             }
861                             
862                             if(nameFile != ""){
863                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
864                                 if (itName != nameMap.end()) { 
865                                     m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
866                                     output << itName->first << '\t' << itName->second << endl; 
867                                     output.close();
868                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
869                             }
870                         }
871                     }
872                                 }
873                                 else{
874                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
875                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
876                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
877                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
878                                         }
879                     if (countfile != "") {
880                         map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
881                         if (itCount != nameCount.end()) { 
882                             trimCountFile << itCount->first << '\t' << itCount->second << endl;
883                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
884                     }
885                     
886                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
887                                         currSeq.setUnaligned(origSeq);
888                                         currSeq.setAligned(origSeq);
889                                         currSeq.printSequence(scrapFASTAFile);
890                                         if(qFileName != ""){
891                                                 currQual.printQScores(scrapQualFile);
892                                         }
893                                 }
894                                 count++;
895                         }
896                         
897                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
898                                 unsigned long long pos = inFASTA.tellg();
899                                 if ((pos == -1) || (pos >= line.end)) { break; }
900                         
901                         #else
902                                 if (inFASTA.eof()) { break; }
903                         #endif
904                         
905                         //report progress
906                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
907                         
908                 }
909                 //report progress
910                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
911                 
912                 
913                 inFASTA.close();
914                 trimFASTAFile.close();
915                 scrapFASTAFile.close();
916                 if (createGroup) {       outGroupsFile.close();   }
917                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
918                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
919         if(countfile != "")     {       scrapCountFile.close(); trimCountFile.close();  }
920                 
921                 return count;
922         }
923         catch(exception& e) {
924                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
925                 exit(1);
926         }
927 }
928
929 /**************************************************************************************************/
930
931 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string trimCountFileName, string scrapCountFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
932         try {
933         
934         int process = 1;
935                 int exitCommand = 1;
936                 processIDS.clear();
937                 
938 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
939                                 //loop through and create all the processes you want
940                 while (process != processors) {
941                         int pid = fork();
942                         
943                         if (pid > 0) {
944                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
945                                 process++;
946                         }else if (pid == 0){
947                                 
948                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
949                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
950                                 vector<vector<string> > tempNameFileNames = nameFileNames;
951
952                                 if(allFiles){
953                                         ofstream temp;
954
955                                         for(int i=0;i<tempFASTAFileNames.size();i++){
956                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
957                                                         if (tempFASTAFileNames[i][j] != "") {
958                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
959                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
960
961                                                                 if(qFileName != ""){
962                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
963                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
964                                                                 }
965                                                                 if(nameFile != ""){
966                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
967                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
968                                                                 }
969                                                         }
970                                                 }
971                                         }
972                                 }
973                                                         
974                                 driverCreateTrim(filename,
975                                                                  qFileName,
976                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
977                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
978                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
979                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
980                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
981                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
982                                  (trimCountFileName + toString(getpid()) + ".temp"),
983                                                                  (scrapCountFileName + toString(getpid()) + ".temp"),
984                                                                  (groupFile + toString(getpid()) + ".temp"),
985                                                                  tempFASTAFileNames,
986                                                                  tempPrimerQualFileNames,
987                                                                  tempNameFileNames,
988                                                                  lines[process],
989                                                                  qLines[process]);
990                 
991                 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); }
992                                 
993                                 //pass groupCounts to parent
994                                 if(createGroup){
995                                         ofstream out;
996                                         string tempFile = filename + toString(getpid()) + ".num.temp";
997                                         m->openOutputFile(tempFile, out);
998                                         
999                                         out << groupCounts.size() << endl;
1000                                         
1001                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
1002                                                 out << it->first << '\t' << it->second << endl;
1003                                         }
1004                     
1005                     out << groupMap.size() << endl;
1006                     for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
1007                                                 out << it->first << '\t' << it->second << endl;
1008                                         }
1009                                         out.close();
1010                                 }
1011                                 exit(0);
1012                         }else { 
1013                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1014                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1015                                 exit(0);
1016                         }
1017                 }
1018                 
1019                 //parent do my part
1020                 ofstream temp;
1021                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
1022                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
1023                 if(qFileName != ""){
1024                         m->openOutputFile(trimQualFileName, temp);              temp.close();
1025                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
1026                 }
1027                 if (nameFile != "") {
1028                         m->openOutputFile(trimNameFileName, temp);              temp.close();
1029                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
1030                 }
1031         if (countfile != "") {
1032                         m->openOutputFile(trimCountFileName, temp);             temp.close();
1033                         m->openOutputFile(scrapCountFileName, temp);            temp.close();
1034                 }
1035
1036                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, trimCountFileName, scrapCountFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
1037                 
1038                 //force parent to wait until all the processes are done
1039                 for (int i=0;i<processIDS.size();i++) { 
1040                         int temp = processIDS[i];
1041                         wait(&temp);
1042                 }
1043 #else
1044         //////////////////////////////////////////////////////////////////////////////////////////////////////
1045                 //Windows version shared memory, so be careful when passing variables through the trimData struct. 
1046                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1047                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1048                 
1049                 vector<trimData*> pDataArray; 
1050                 DWORD   dwThreadIdArray[processors-1];
1051                 HANDLE  hThreadArray[processors-1]; 
1052                 
1053                 //Create processor worker threads.
1054                 for( int h=0; h<processors-1; h++){
1055                         
1056             string extension = "";
1057                         if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
1058             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1059             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1060             vector<vector<string> > tempNameFileNames = nameFileNames;
1061             
1062             if(allFiles){
1063                 ofstream temp;
1064                 
1065                 for(int i=0;i<tempFASTAFileNames.size();i++){
1066                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
1067                         if (tempFASTAFileNames[i][j] != "") {
1068                             tempFASTAFileNames[i][j] += extension;
1069                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
1070                             
1071                             if(qFileName != ""){
1072                                 tempPrimerQualFileNames[i][j] += extension;
1073                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
1074                             }
1075                             if(nameFile != ""){
1076                                 tempNameFileNames[i][j] += extension;
1077                                 m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
1078                             }
1079                         }
1080                     }
1081                 }
1082             }
1083
1084             
1085                         trimData* tempTrim = new trimData(filename,
1086                                               qFileName, nameFile, countfile,
1087                                               (trimFASTAFileName+extension),
1088                                               (scrapFASTAFileName+extension),
1089                                               (trimQualFileName+extension),
1090                                               (scrapQualFileName+extension),
1091                                               (trimNameFileName+extension),
1092                                               (scrapNameFileName+extension),
1093                                               (trimCountFileName+extension),
1094                                               (scrapCountFileName+extension),
1095                                               (groupFile+extension),
1096                                               tempFASTAFileNames,
1097                                               tempPrimerQualFileNames,
1098                                               tempNameFileNames,
1099                                               lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
1100                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, 
1101                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
1102                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
1103                                              minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap, nameCount);
1104                         pDataArray.push_back(tempTrim);
1105             
1106                         hThreadArray[h] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
1107                 }
1108         
1109         //parent do my part
1110                 ofstream temp;
1111                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
1112                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
1113                 if(qFileName != ""){
1114                         m->openOutputFile(trimQualFileName, temp);              temp.close();
1115                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
1116                 }
1117                 if (nameFile != "") {
1118                         m->openOutputFile(trimNameFileName, temp);              temp.close();
1119                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
1120                 }
1121         vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1122         vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1123         vector<vector<string> > tempNameFileNames = nameFileNames;
1124         if(allFiles){
1125             ofstream temp;
1126             string extension = toString(processors-1) + ".temp";
1127             for(int i=0;i<tempFASTAFileNames.size();i++){
1128                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1129                     if (tempFASTAFileNames[i][j] != "") {
1130                         tempFASTAFileNames[i][j] += extension;
1131                         m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
1132                         
1133                         if(qFileName != ""){
1134                             tempPrimerQualFileNames[i][j] += extension;
1135                             m->openOutputFile(tempPrimerQualFileNames[i][j], temp);             temp.close();
1136                         }
1137                         if(nameFile != ""){
1138                             tempNameFileNames[i][j] += extension;
1139                             m->openOutputFile(tempNameFileNames[i][j], temp);           temp.close();
1140                         }
1141                     }
1142                 }
1143             }
1144         }
1145         
1146                 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames, tempNameFileNames, lines[processors-1], qLines[processors-1]);
1147         processIDS.push_back(processors-1);
1148
1149         
1150                 //Wait until all threads have terminated.
1151                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1152                 
1153                 //Close all thread handles and free memory allocations.
1154                 for(int i=0; i < pDataArray.size(); i++){
1155                         for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
1156                 map<string, int>::iterator it2 = groupCounts.find(it->first);
1157                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
1158                 else { groupCounts[it->first] += it->second; }
1159             }
1160             for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
1161                 map<string, string>::iterator it2 = groupMap.find(it->first);
1162                 if (it2 == groupMap.end()) {    groupMap[it->first] = it->second; }
1163                 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
1164             }
1165             CloseHandle(hThreadArray[i]);
1166                         delete pDataArray[i];
1167                 }
1168         
1169 #endif          
1170         
1171         
1172         //append files
1173                 for(int i=0;i<processIDS.size();i++){
1174                         
1175                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1176                         
1177                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
1178                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
1179                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1180                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1181                         
1182                         if(qFileName != ""){
1183                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1184                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1185                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1186                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1187                         }
1188                         
1189                         if(nameFile != ""){
1190                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1191                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1192                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1193                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1194                         }
1195             
1196             if(countfile != ""){
1197                                 m->appendFiles((trimCountFileName + toString(processIDS[i]) + ".temp"), trimCountFileName);
1198                                 m->mothurRemove((trimCountFileName + toString(processIDS[i]) + ".temp"));
1199                                 m->appendFiles((scrapCountFileName + toString(processIDS[i]) + ".temp"), scrapCountFileName);
1200                                 m->mothurRemove((scrapCountFileName + toString(processIDS[i]) + ".temp"));
1201                         }
1202                         
1203                         if((createGroup)&&(countfile == "")){
1204                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1205                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1206                         }
1207                         
1208                         
1209                         if(allFiles){
1210                                 for(int j=0;j<fastaFileNames.size();j++){
1211                                         for(int k=0;k<fastaFileNames[j].size();k++){
1212                                                 if (fastaFileNames[j][k] != "") {
1213                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1214                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1215                                                         
1216                                                         if(qFileName != ""){
1217                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1218                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1219                                                         }
1220                                                         
1221                                                         if(nameFile != ""){
1222                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1223                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1224                                                         }
1225                                                 }
1226                                         }
1227                                 }
1228                         }
1229                         
1230             #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1231                         if(createGroup){
1232                                 ifstream in;
1233                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1234                                 m->openInputFile(tempFile, in);
1235                                 int tempNum;
1236                                 string group;
1237                                 
1238                                 in >> tempNum; m->gobble(in);
1239                                 
1240                                 if (tempNum != 0) {
1241                                         for (int i = 0; i < tempNum; i++) { 
1242                         int groupNum;
1243                                                 in >> group >> groupNum; m->gobble(in);
1244                         
1245                                                 map<string, int>::iterator it = groupCounts.find(group);
1246                                                 if (it == groupCounts.end()) {  groupCounts[group] = groupNum; }
1247                                                 else { groupCounts[it->first] += groupNum; }
1248                                         }
1249                                 }
1250                 in >> tempNum; m->gobble(in);
1251                 if (tempNum != 0) {
1252                                         for (int i = 0; i < tempNum; i++) { 
1253                         string group, seqName;
1254                                                 in >> seqName >> group; m->gobble(in);
1255                         
1256                                                 map<string, string>::iterator it = groupMap.find(seqName);
1257                                                 if (it == groupMap.end()) {     groupMap[seqName] = group; }
1258                                                 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
1259                                         }
1260                                 }
1261                 
1262                                 in.close(); m->mothurRemove(tempFile);
1263                         }
1264             #endif
1265                 }
1266
1267         return exitCommand;
1268         }
1269         catch(exception& e) {
1270                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1271                 exit(1);
1272         }
1273 }
1274
1275 /**************************************************************************************************/
1276
1277 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1278         try {
1279         
1280         vector<unsigned long long> fastaFilePos;
1281                 vector<unsigned long long> qfileFilePos;
1282                 
1283                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1284                 //set file positions for fasta file
1285                 fastaFilePos = m->divideFile(filename, processors);
1286                 
1287                 //get name of first sequence in each chunk
1288                 map<string, int> firstSeqNames;
1289                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1290                         ifstream in;
1291                         m->openInputFile(filename, in);
1292                         in.seekg(fastaFilePos[i]);
1293                 
1294                         Sequence temp(in); 
1295                         firstSeqNames[temp.getName()] = i;
1296                 
1297                         in.close();
1298                 }
1299                 
1300                 if(qfilename != "")     {
1301             //seach for filePos of each first name in the qfile and save in qfileFilePos
1302             ifstream inQual;
1303             m->openInputFile(qfilename, inQual);
1304             
1305             string input;
1306             while(!inQual.eof()){       
1307                 input = m->getline(inQual);
1308                 
1309                 if (input.length() != 0) {
1310                     if(input[0] == '>'){ //this is a sequence name line
1311                         istringstream nameStream(input);
1312                         
1313                         string sname = "";  nameStream >> sname;
1314                         sname = sname.substr(1);
1315                         
1316                         map<string, int>::iterator it = firstSeqNames.find(sname);
1317                         
1318                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
1319                             unsigned long long pos = inQual.tellg(); 
1320                             qfileFilePos.push_back(pos - input.length() - 1);   
1321                             firstSeqNames.erase(it);
1322                         }
1323                     }
1324                 }
1325                 
1326                 if (firstSeqNames.size() == 0) { break; }
1327             }
1328             inQual.close();
1329             
1330             
1331             if (firstSeqNames.size() != 0) { 
1332                 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1333                     m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1334                 }
1335                 qFileName = "";
1336                 return processors;
1337             }
1338             
1339             //get last file position of qfile
1340             FILE * pFile;
1341             unsigned long long size;
1342             
1343             //get num bytes in file
1344             pFile = fopen (qfilename.c_str(),"rb");
1345             if (pFile==NULL) perror ("Error opening file");
1346             else{
1347                 fseek (pFile, 0, SEEK_END);
1348                 size=ftell (pFile);
1349                 fclose (pFile);
1350             }
1351             
1352             qfileFilePos.push_back(size);
1353         }
1354         
1355         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1356             if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); }
1357                         lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1358                         if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
1359                 }       
1360                 if(qfilename == "")     {       qLines = lines; } //files with duds
1361                 
1362                 return processors;
1363                 
1364                 #else
1365             
1366         if (processors == 1) { //save time
1367                         //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1368                         //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1369             lines.push_back(linePair(0, 1000));
1370             if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
1371         }else{
1372             int numFastaSeqs = 0;
1373             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
1374             if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1375         
1376             if (qfilename != "") { 
1377                 int numQualSeqs = 0;
1378                 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
1379                 
1380                 if (numFastaSeqs != numQualSeqs) {
1381                     m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
1382                 }
1383             }
1384         
1385             //figure out how many sequences you have to process
1386             int numSeqsPerProcessor = numFastaSeqs / processors;
1387             for (int i = 0; i < processors; i++) {
1388                 int startIndex =  i * numSeqsPerProcessor;
1389                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
1390                 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1391                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1392             }
1393         }
1394             if(qfilename == "") {       qLines = lines; } //files with duds
1395                         return 1;
1396                 
1397                 #endif
1398         }
1399         catch(exception& e) {
1400                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1401                 exit(1);
1402         }
1403 }
1404
1405 //***************************************************************************************************************
1406
1407 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1408         try {
1409                 ifstream inOligos;
1410                 m->openInputFile(oligoFile, inOligos);
1411                 
1412                 ofstream test;
1413                 
1414                 string type, oligo, group;
1415
1416                 int indexPrimer = 0;
1417                 int indexBarcode = 0;
1418                 
1419                 while(!inOligos.eof()){
1420
1421                         inOligos >> type; 
1422             
1423                         if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
1424             
1425                         if(type[0] == '#'){
1426                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1427                                 m->gobble(inOligos);
1428                         }
1429                         else{
1430                                 m->gobble(inOligos);
1431                                 //make type case insensitive
1432                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1433                                 
1434                                 inOligos >> oligo;
1435                 
1436                 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
1437                                 
1438                                 for(int i=0;i<oligo.length();i++){
1439                                         oligo[i] = toupper(oligo[i]);
1440                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1441                                 }
1442                                 
1443                                 if(type == "FORWARD"){
1444                                         group = "";
1445                                         
1446                                         // get rest of line in case there is a primer name
1447                                         while (!inOligos.eof()) {       
1448                                                 char c = inOligos.get(); 
1449                                                 if (c == 10 || c == 13){        break;  }
1450                                                 else if (c == 32 || c == 9){;} //space or tab
1451                                                 else {  group += c;  }
1452                                         } 
1453                                         
1454                                         //check for repeat barcodes
1455                                         map<string, int>::iterator itPrime = primers.find(oligo);
1456                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1457                                         
1458                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); }  }
1459                     
1460                                         primers[oligo]=indexPrimer; indexPrimer++;              
1461                                         primerNameVector.push_back(group);
1462                                 }
1463                                 else if(type == "REVERSE"){
1464                                         //Sequence oligoRC("reverse", oligo);
1465                                         //oligoRC.reverseComplement();
1466                     string oligoRC = reverseOligo(oligo);
1467                                         revPrimer.push_back(oligoRC);
1468                                 }
1469                                 else if(type == "BARCODE"){
1470                                         inOligos >> group;
1471                                         
1472                                         //check for repeat barcodes
1473                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1474                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1475                     
1476                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1477                                         barcodeNameVector.push_back(group);
1478                                 }else if(type == "LINKER"){
1479                                         linker.push_back(oligo);
1480                                 }else if(type == "SPACER"){
1481                                         spacer.push_back(oligo);
1482                                 }
1483                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1484                         }
1485                         m->gobble(inOligos);
1486                 }       
1487                 inOligos.close();
1488                 
1489                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1490                 
1491                 //add in potential combos
1492                 if(barcodeNameVector.size() == 0){
1493                         barcodes[""] = 0;
1494                         barcodeNameVector.push_back("");                        
1495                 }
1496                 
1497                 if(primerNameVector.size() == 0){
1498                         primers[""] = 0;
1499                         primerNameVector.push_back("");                 
1500                 }
1501                 
1502                 fastaFileNames.resize(barcodeNameVector.size());
1503                 for(int i=0;i<fastaFileNames.size();i++){
1504                         fastaFileNames[i].assign(primerNameVector.size(), "");
1505                 }
1506                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1507                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1508                 
1509                 if(allFiles){
1510                         set<string> uniqueNames; //used to cleanup outputFileNames
1511                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1512                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1513                                         
1514                                         string primerName = primerNameVector[itPrimer->second];
1515                                         string barcodeName = barcodeNameVector[itBar->second];
1516                                         
1517                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing 
1518                                         else {
1519                         string comboGroupName = "";
1520                         string fastaFileName = "";
1521                         string qualFileName = "";
1522                         string nameFileName = "";
1523                         string countFileName = "";
1524                         
1525                         if(primerName == ""){
1526                             comboGroupName = barcodeNameVector[itBar->second];
1527                         }
1528                         else{
1529                             if(barcodeName == ""){
1530                                 comboGroupName = primerNameVector[itPrimer->second];
1531                             }
1532                             else{
1533                                 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1534                             }
1535                         }
1536                         
1537                         
1538                         ofstream temp;
1539                         map<string, string> variables; 
1540                         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1541                         variables["[tag]"] = comboGroupName;
1542                         fastaFileName = getOutputFileName("fasta", variables);
1543                         if (uniqueNames.count(fastaFileName) == 0) {
1544                             outputNames.push_back(fastaFileName);
1545                             outputTypes["fasta"].push_back(fastaFileName);
1546                             uniqueNames.insert(fastaFileName);
1547                         }
1548                         
1549                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1550                         m->openOutputFile(fastaFileName, temp);         temp.close();
1551                         
1552                         if(qFileName != ""){
1553                             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1554                             qualFileName = getOutputFileName("qfile", variables);
1555                             if (uniqueNames.count(qualFileName) == 0) {
1556                                 outputNames.push_back(qualFileName);
1557                                 outputTypes["qfile"].push_back(qualFileName);
1558                             }
1559                             
1560                             qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1561                             m->openOutputFile(qualFileName, temp);              temp.close();
1562                         }
1563                         
1564                         if(nameFile != ""){
1565                             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1566                             nameFileName = getOutputFileName("name", variables);
1567                             if (uniqueNames.count(nameFileName) == 0) {
1568                                 outputNames.push_back(nameFileName);
1569                                 outputTypes["name"].push_back(nameFileName);
1570                             }
1571                             
1572                             nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1573                             m->openOutputFile(nameFileName, temp);              temp.close();
1574                         }
1575                     }
1576                                 }
1577                         }
1578                 }
1579                 numFPrimers = primers.size();
1580                 numRPrimers = revPrimer.size();
1581         numLinkers = linker.size();
1582         numSpacers = spacer.size();
1583                 
1584                 bool allBlank = true;
1585                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1586                         if (barcodeNameVector[i] != "") {
1587                                 allBlank = false;
1588                                 break;
1589                         }
1590                 }
1591                 for (int i = 0; i < primerNameVector.size(); i++) {
1592                         if (primerNameVector[i] != "") {
1593                                 allBlank = false;
1594                                 break;
1595                         }
1596                 }
1597
1598                 if (allBlank) {
1599                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1600                         allFiles = false;
1601                         return false;
1602                 }
1603                 
1604                 return true;
1605                 
1606         }
1607         catch(exception& e) {
1608                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1609                 exit(1);
1610         }
1611 }
1612 //***************************************************************************************************************
1613
1614 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1615         try {
1616                 bool success = 1;
1617                 if(qscores.getName() != ""){
1618                         qscores.trimQScores(-1, keepFirst);
1619                 }
1620                 sequence.trim(keepFirst);
1621                 return success;
1622         }
1623         catch(exception& e) {
1624                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1625                 exit(1);
1626         }
1627         
1628 }       
1629
1630 //***************************************************************************************************************
1631
1632 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1633         try {
1634                 bool success = 0;
1635                 
1636                 int length = sequence.getNumBases() - removeLast;
1637                 
1638                 if(length > 0){
1639                         if(qscores.getName() != ""){
1640                                 qscores.trimQScores(-1, length);
1641                         }
1642                         sequence.trim(length);
1643                         success = 1;
1644                 }
1645                 else{
1646                         success = 0;
1647                 }
1648
1649                 return success;
1650         }
1651         catch(exception& e) {
1652                 m->errorOut(e, "removeLastTrim", "countDiffs");
1653                 exit(1);
1654         }
1655         
1656 }       
1657
1658 //***************************************************************************************************************
1659
1660 bool TrimSeqsCommand::cullLength(Sequence& seq){
1661         try {
1662         
1663                 int length = seq.getNumBases();
1664                 bool success = 0;       //guilty until proven innocent
1665                 
1666                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1667                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1668                 else                                                                                            {       success = 0;    }
1669                 
1670                 return success;
1671         
1672         }
1673         catch(exception& e) {
1674                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1675                 exit(1);
1676         }
1677         
1678 }
1679
1680 //***************************************************************************************************************
1681
1682 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1683         try {
1684                 int longHomoP = seq.getLongHomoPolymer();
1685                 bool success = 0;       //guilty until proven innocent
1686                 
1687                 if(longHomoP <= maxHomoP){      success = 1;    }
1688                 else                                    {       success = 0;    }
1689                 
1690                 return success;
1691         }
1692         catch(exception& e) {
1693                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1694                 exit(1);
1695         }
1696         
1697 }
1698 //********************************************************************/
1699 string TrimSeqsCommand::reverseOligo(string oligo){
1700         try {
1701         string reverse = "";
1702         
1703         for(int i=oligo.length()-1;i>=0;i--){
1704             
1705             if(oligo[i] == 'A')         {       reverse += 'T'; }
1706             else if(oligo[i] == 'T'){   reverse += 'A'; }
1707             else if(oligo[i] == 'U'){   reverse += 'A'; }
1708             
1709             else if(oligo[i] == 'G'){   reverse += 'C'; }
1710             else if(oligo[i] == 'C'){   reverse += 'G'; }
1711             
1712             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1713             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1714             
1715             else if(oligo[i] == 'M'){   reverse += 'K'; }
1716             else if(oligo[i] == 'K'){   reverse += 'M'; }
1717             
1718             else if(oligo[i] == 'W'){   reverse += 'W'; }
1719             else if(oligo[i] == 'S'){   reverse += 'S'; }
1720             
1721             else if(oligo[i] == 'B'){   reverse += 'V'; }
1722             else if(oligo[i] == 'V'){   reverse += 'B'; }
1723             
1724             else if(oligo[i] == 'D'){   reverse += 'H'; }
1725             else if(oligo[i] == 'H'){   reverse += 'D'; }
1726             
1727             else                                                {       reverse += 'N'; }
1728         }
1729         
1730         
1731         return reverse;
1732     }
1733         catch(exception& e) {
1734                 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1735                 exit(1);
1736         }
1737 }
1738
1739 //***************************************************************************************************************
1740
1741 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1742         try {
1743                 int numNs = seq.getAmbigBases();
1744                 bool success = 0;       //guilty until proven innocent
1745                 
1746                 if(numNs <= maxAmbig)   {       success = 1;    }
1747                 else                                    {       success = 0;    }
1748                 
1749                 return success;
1750         }
1751         catch(exception& e) {
1752                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1753                 exit(1);
1754         }
1755         
1756 }
1757 //***************************************************************************************************************