]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
Merge remote-tracking branch 'origin/master'
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14
15 //**********************************************************************************************************************
16 vector<string> TrimSeqsCommand::setParameters(){        
17         try {
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
19                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
20                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
21                 CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
22         CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
23                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
24                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
25                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
26                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
27                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
28                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
29                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
30         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
31                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
32         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
33                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
34                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
35                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
36                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
37                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
38                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
39                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
40                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
41                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
42                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
43                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
44                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
45                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
46                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
47                         
48                 vector<string> myArray;
49                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
50                 return myArray;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
54                 exit(1);
55         }
56 }
57 //**********************************************************************************************************************
58 string TrimSeqsCommand::getHelpString(){        
59         try {
60                 string helpString = "";
61                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
62                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
63                 helpString += "The trim.seqs command parameters are fasta, name, count, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
64                 helpString += "The fasta parameter is required.\n";
65                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
66                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
67                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
68         helpString += "The count parameter allows you to provide a count file with your fasta file.\n";
69                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
70                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
71                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
72                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
73                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
74                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
75                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
76         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
77                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
78                 helpString += "The qfile parameter allows you to provide a quality file.\n";
79                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
80                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
81                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
82                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
83                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
84                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
85                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
86                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
87                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
88                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
89                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
90                 helpString += "The trim.seqs command should be in the following format: \n";
91                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
92                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
93                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
94                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
95                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
96                 return helpString;
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
100                 exit(1);
101         }
102 }
103 //**********************************************************************************************************************
104 string TrimSeqsCommand::getOutputFileNameTag(string type, string inputName=""){ 
105         try {
106         string outputFileName = "";
107                 map<string, vector<string> >::iterator it;
108         
109         //is this a type this command creates
110         it = outputTypes.find(type);
111         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
112         else {
113             if (type == "qfile")            {   outputFileName =  "qual";   }
114             else if (type == "fasta")            {   outputFileName =  "fasta";   }
115             else if (type == "group")            {   outputFileName =  "groups";   }
116             else if (type == "name")            {   outputFileName =  "names";   }
117             else if (type == "count")            {   outputFileName =  "count_table";   }
118             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
119         }
120         return outputFileName;
121         }
122         catch(exception& e) {
123                 m->errorOut(e, "TrimSeqsCommand", "getOutputFileNameTag");
124                 exit(1);
125         }
126 }
127
128
129 //**********************************************************************************************************************
130
131 TrimSeqsCommand::TrimSeqsCommand(){     
132         try {
133                 abort = true; calledHelp = true; 
134                 setParameters();
135                 vector<string> tempOutNames;
136                 outputTypes["fasta"] = tempOutNames;
137                 outputTypes["qfile"] = tempOutNames;
138                 outputTypes["group"] = tempOutNames;
139                 outputTypes["name"] = tempOutNames;
140         outputTypes["count"] = tempOutNames;
141         }
142         catch(exception& e) {
143                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
144                 exit(1);
145         }
146 }
147 //***************************************************************************************************************
148
149 TrimSeqsCommand::TrimSeqsCommand(string option)  {
150         try {
151                 
152                 abort = false; calledHelp = false;   
153                 comboStarts = 0;
154                 
155                 //allow user to run help
156                 if(option == "help") { help(); abort = true; calledHelp = true; }
157                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
158                 
159                 else {
160                         vector<string> myArray = setParameters();
161                         
162                         OptionParser parser(option);
163                         map<string,string> parameters = parser.getParameters();
164                         
165                         ValidParameters validParameter;
166                         map<string,string>::iterator it;
167                         
168                         //check to make sure all parameters are valid for command
169                         for (it = parameters.begin(); it != parameters.end(); it++) { 
170                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
171                         }
172                         
173                         //initialize outputTypes
174                         vector<string> tempOutNames;
175                         outputTypes["fasta"] = tempOutNames;
176                         outputTypes["qfile"] = tempOutNames;
177                         outputTypes["group"] = tempOutNames;
178                         outputTypes["name"] = tempOutNames;
179             outputTypes["count"] = tempOutNames;
180                         
181                         //if the user changes the input directory command factory will send this info to us in the output parameter 
182                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
183                         if (inputDir == "not found"){   inputDir = "";          }
184                         else {
185                                 string path;
186                                 it = parameters.find("fasta");
187                                 //user has given a template file
188                                 if(it != parameters.end()){ 
189                                         path = m->hasPath(it->second);
190                                         //if the user has not given a path then, add inputdir. else leave path alone.
191                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
192                                 }
193                                 
194                                 it = parameters.find("oligos");
195                                 //user has given a template file
196                                 if(it != parameters.end()){ 
197                                         path = m->hasPath(it->second);
198                                         //if the user has not given a path then, add inputdir. else leave path alone.
199                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
200                                 }
201                                 
202                                 it = parameters.find("qfile");
203                                 //user has given a template file
204                                 if(it != parameters.end()){ 
205                                         path = m->hasPath(it->second);
206                                         //if the user has not given a path then, add inputdir. else leave path alone.
207                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
208                                 }
209                                 
210                                 it = parameters.find("name");
211                                 //user has given a template file
212                                 if(it != parameters.end()){ 
213                                         path = m->hasPath(it->second);
214                                         //if the user has not given a path then, add inputdir. else leave path alone.
215                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
216                                 }
217                 
218                 it = parameters.find("count");
219                                 //user has given a template file
220                                 if(it != parameters.end()){ 
221                                         path = m->hasPath(it->second);
222                                         //if the user has not given a path then, add inputdir. else leave path alone.
223                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
224                                 }
225                                 
226                         }
227
228                         
229                         //check for required parameters
230                         fastaFile = validParameter.validFile(parameters, "fasta", true);
231                         if (fastaFile == "not found") {                                 
232                                 fastaFile = m->getFastaFile(); 
233                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
234                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
235                         }else if (fastaFile == "not open") { abort = true; }    
236                         else { m->setFastaFile(fastaFile); }
237                         
238                         //if the user changes the output directory command factory will send this info to us in the output parameter 
239                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
240                                 outputDir = ""; 
241                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
242                         }
243                 
244                         
245                         //check for optional parameter and set defaults
246                         // ...at some point should added some additional type checking...
247                         string temp;
248                         temp = validParameter.validFile(parameters, "flip", false);
249                         if (temp == "not found")    {   flip = 0;       }
250                         else {  flip = m->isTrue(temp);         }
251                 
252                         temp = validParameter.validFile(parameters, "oligos", true);
253                         if (temp == "not found"){       oligoFile = "";         }
254                         else if(temp == "not open"){    abort = true;   } 
255                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
256                         
257                         
258                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
259                         m->mothurConvert(temp, maxAmbig);  
260
261                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
262                         m->mothurConvert(temp, maxHomoP);  
263
264                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
265                         m->mothurConvert(temp, minLength); 
266                         
267                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
268                         m->mothurConvert(temp, maxLength);
269                         
270                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
271                         m->mothurConvert(temp, bdiffs);
272                         
273                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
274                         m->mothurConvert(temp, pdiffs);
275             
276             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
277                         m->mothurConvert(temp, ldiffs);
278             
279             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
280                         m->mothurConvert(temp, sdiffs);
281                         
282                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
283                         m->mothurConvert(temp, tdiffs);
284                         
285                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
286                         
287                         temp = validParameter.validFile(parameters, "qfile", true);     
288                         if (temp == "not found")        {       qFileName = "";         }
289                         else if(temp == "not open")     {       abort = true;           }
290                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
291                         
292                         temp = validParameter.validFile(parameters, "name", true);      
293                         if (temp == "not found")        {       nameFile = "";          }
294                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
295                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
296             
297             countfile = validParameter.validFile(parameters, "count", true);
298                         if (countfile == "not open") { abort = true; countfile = ""; }  
299                         else if (countfile == "not found") { countfile = ""; }
300                         else { m->setCountTableFile(countfile); }
301                         
302             if ((countfile != "") && (nameFile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
303                         
304                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
305                         m->mothurConvert(temp, qThreshold);
306                         
307                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
308                         qtrim = m->isTrue(temp);
309
310                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
311                         convert(temp, qRollAverage);
312
313                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
314                         convert(temp, qWindowAverage);
315
316                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
317                         convert(temp, qWindowSize);
318
319                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
320                         convert(temp, qWindowStep);
321
322                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
323                         convert(temp, qAverage);
324
325                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
326                         convert(temp, keepFirst);
327
328                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
329                         convert(temp, removeLast);
330                         
331                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
332                         allFiles = m->isTrue(temp);
333             
334             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
335                         keepforward = m->isTrue(temp);
336                         
337                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
338                         m->setProcessors(temp);
339                         m->mothurConvert(temp, processors); 
340                         
341                         
342                         if(allFiles && (oligoFile == "")){
343                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
344                         }
345                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
346                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
347                                 qAverage=0;
348                                 qThreshold=0;
349                         }
350                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
351                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
352                                 abort = true;
353                         }
354                         
355             if (countfile == "") {
356                 if (nameFile == "") {
357                     vector<string> files; files.push_back(fastaFile);
358                     parser.getNameFile(files);
359                 }
360             }
361                 }
362
363         }
364         catch(exception& e) {
365                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
366                 exit(1);
367         }
368 }
369 //***************************************************************************************************************
370
371 int TrimSeqsCommand::execute(){
372         try{
373         
374                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
375                 
376                 numFPrimers = 0;  //this needs to be initialized
377                 numRPrimers = 0;
378         numSpacers = 0;
379         numLinkers = 0;
380                 createGroup = false;
381                 vector<vector<string> > fastaFileNames;
382                 vector<vector<string> > qualFileNames;
383                 vector<vector<string> > nameFileNames;
384                 
385                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("fasta");
386                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
387                 
388                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("fasta");
389                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
390                 
391                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("qfile");
392                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("qfile");
393                 
394                 if (qFileName != "") {
395                         outputNames.push_back(trimQualFile);
396                         outputNames.push_back(scrapQualFile);
397                         outputTypes["qfile"].push_back(trimQualFile);
398                         outputTypes["qfile"].push_back(scrapQualFile); 
399                 }
400                 
401                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim." + getOutputFileNameTag("name");
402                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap." + getOutputFileNameTag("name");
403                 
404                 if (nameFile != "") {
405                         m->readNames(nameFile, nameMap);
406                         outputNames.push_back(trimNameFile);
407                         outputNames.push_back(scrapNameFile);
408                         outputTypes["name"].push_back(trimNameFile);
409                         outputTypes["name"].push_back(scrapNameFile); 
410                 }
411         
412         string trimCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + "trim." + getOutputFileNameTag("count");
413                 string scrapCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + "scrap." + getOutputFileNameTag("count");
414                 
415                 if (countfile != "") {
416             CountTable ct;
417             ct.readTable(countfile);
418             nameCount = ct.getNameMap();
419                         outputNames.push_back(trimCountFile);
420                         outputNames.push_back(scrapCountFile);
421                         outputTypes["count"].push_back(trimCountFile);
422                         outputTypes["count"].push_back(scrapCountFile); 
423                 }
424
425                 
426                 if (m->control_pressed) { return 0; }
427                 
428                 string outputGroupFileName;
429                 if(oligoFile != ""){
430                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
431                         if ((createGroup) && (countfile == "")){
432                                 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + getOutputFileNameTag("group");
433                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
434                         }
435                 }
436        
437         //fills lines and qlines
438                 setLines(fastaFile, qFileName);
439                 
440         if(processors == 1){
441             driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
442         }else{
443             createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
444         }       
445                 
446                 
447                 if (m->control_pressed) {  return 0; }                  
448         
449                 if(allFiles){
450                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
451                         map<string, string>::iterator it;
452                         set<string> namesToRemove;
453                         for(int i=0;i<fastaFileNames.size();i++){
454                                 for(int j=0;j<fastaFileNames[0].size();j++){
455                                         if (fastaFileNames[i][j] != "") {
456                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
457                                                         if(m->isBlank(fastaFileNames[i][j])){
458                                                                 m->mothurRemove(fastaFileNames[i][j]);
459                                                                 namesToRemove.insert(fastaFileNames[i][j]);
460                                                         
461                                                                 if(qFileName != ""){
462                                                                         m->mothurRemove(qualFileNames[i][j]);
463                                                                         namesToRemove.insert(qualFileNames[i][j]);
464                                                                 }
465                                                                 
466                                                                 if(nameFile != ""){
467                                                                         m->mothurRemove(nameFileNames[i][j]);
468                                                                         namesToRemove.insert(nameFileNames[i][j]);
469                                                                 }
470                                                         }else{  
471                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
472                                                                 if (it == uniqueFastaNames.end()) {     
473                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
474                                                                 }       
475                                                         }
476                                                 }
477                                         }
478                                 }
479                         }
480                         
481                         //remove names for outputFileNames, just cleans up the output
482                         vector<string> outputNames2;
483                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
484                         outputNames = outputNames2;
485                         
486             for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
487                 ifstream in;
488                 m->openInputFile(it->first, in);
489                 
490                 ofstream out;
491                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first));
492                 if (countfile == "") { thisGroupName += getOutputFileNameTag("group"); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); }
493                 else {  thisGroupName += getOutputFileNameTag("count"); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName);  }
494                 m->openOutputFile(thisGroupName, out);
495                 
496                 if (countfile != "") {  out << "Representative_Sequence\ttotal\t" << it->second << endl;  }
497                 
498                 while (!in.eof()){
499                     if (m->control_pressed) { break; }
500                     
501                     Sequence currSeq(in); m->gobble(in);
502                     if (countfile == "") {  
503                         out << currSeq.getName() << '\t' << it->second << endl;  
504                         
505                         if (nameFile != "") {
506                             map<string, string>::iterator itName = nameMap.find(currSeq.getName());
507                             if (itName != nameMap.end()) { 
508                                 vector<string> thisSeqsNames; 
509                                 m->splitAtChar(itName->second, thisSeqsNames, ',');
510                                 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
511                                     out << thisSeqsNames[k] << '\t' << it->second << endl;
512                                 }
513                             }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                       
514                         }
515                     }else { 
516                         map<string, int>::iterator itTotalReps = nameCount.find(currSeq.getName());
517                         if (itTotalReps != nameCount.end()) { out << currSeq.getName() << '\t' << itTotalReps->second << '\t' << itTotalReps->second << endl; }
518                         else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
519                     }
520                 }
521                 in.close();
522                 out.close();
523             }
524             
525             if (countfile != "") { //create countfile with group info included
526                 CountTable* ct = new CountTable();
527                 ct->readTable(trimCountFile);
528                 map<string, int> justTrimmedNames = ct->getNameMap();
529                 delete ct;
530                 
531                 CountTable newCt;
532                 for (map<string, int>::iterator itCount = groupCounts.begin(); itCount != groupCounts.end(); itCount++) { newCt.addGroup(itCount->first); }
533                 vector<int> tempCounts; tempCounts.resize(groupCounts.size(), 0);
534                 for (map<string, int>::iterator itNames = justTrimmedNames.begin(); itNames != justTrimmedNames.end(); itNames++) {
535                     newCt.push_back(itNames->first, tempCounts); //add it to the table with no abundance so we can set the groups abundance
536                     map<string, string>::iterator it2 = groupMap.find(itNames->first);
537                     if (it2 != groupMap.end()) { newCt.setAbund(itNames->first, it2->second, itNames->second); }
538                     else { m->mothurOut("[ERROR]: missing group info for " + itNames->first + "."); m->mothurOutEndLine(); m->control_pressed = true; }
539                 }
540                 newCt.printTable(trimCountFile);
541             }
542                 }
543                 
544                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
545
546                 //output group counts
547                 m->mothurOutEndLine();
548                 int total = 0;
549                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
550                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
551                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
552                 }
553                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
554                 
555                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
556
557                 //set fasta file as new current fastafile
558                 string current = "";
559                 itTypes = outputTypes.find("fasta");
560                 if (itTypes != outputTypes.end()) {
561                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
562                 }
563                 
564                 itTypes = outputTypes.find("name");
565                 if (itTypes != outputTypes.end()) {
566                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
567                 }
568                 
569                 itTypes = outputTypes.find("qfile");
570                 if (itTypes != outputTypes.end()) {
571                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
572                 }
573                 
574                 itTypes = outputTypes.find("group");
575                 if (itTypes != outputTypes.end()) {
576                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
577                 }
578         
579         itTypes = outputTypes.find("count");
580                 if (itTypes != outputTypes.end()) {
581                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
582                 }
583
584                 m->mothurOutEndLine();
585                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
586                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
587                 m->mothurOutEndLine();
588                 
589                 return 0;       
590                         
591         }
592         catch(exception& e) {
593                 m->errorOut(e, "TrimSeqsCommand", "execute");
594                 exit(1);
595         }
596 }
597                 
598 /**************************************************************************************/
599 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string trimCFileName, string scrapCFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) { 
600                 
601         try {
602                 
603                 ofstream trimFASTAFile;
604                 m->openOutputFile(trimFileName, trimFASTAFile);
605                 
606                 ofstream scrapFASTAFile;
607                 m->openOutputFile(scrapFileName, scrapFASTAFile);
608                 
609                 ofstream trimQualFile;
610                 ofstream scrapQualFile;
611                 if(qFileName != ""){
612                         m->openOutputFile(trimQFileName, trimQualFile);
613                         m->openOutputFile(scrapQFileName, scrapQualFile);
614                 }
615                 
616                 ofstream trimNameFile;
617                 ofstream scrapNameFile;
618                 if(nameFile != ""){
619                         m->openOutputFile(trimNFileName, trimNameFile);
620                         m->openOutputFile(scrapNFileName, scrapNameFile);
621                 }
622                 
623         ofstream trimCountFile;
624                 ofstream scrapCountFile;
625                 if(countfile != ""){
626                         m->openOutputFile(trimCFileName, trimCountFile);
627                         m->openOutputFile(scrapCFileName, scrapCountFile);
628             if (line.start == 0) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
629                 }
630                 
631                 ofstream outGroupsFile;
632                 if ((createGroup) && (countfile == "")){        m->openOutputFile(groupFileName, outGroupsFile);   }
633                 if(allFiles){
634                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
635                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
636                                         if (fastaFileNames[i][j] != "") {
637                                                 ofstream temp;
638                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
639                                                 if(qFileName != ""){
640                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
641                                                 }
642                                                 
643                                                 if(nameFile != ""){
644                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
645                                                 }
646                                         }
647                                 }
648                         }
649                 }
650                 
651                 ifstream inFASTA;
652                 m->openInputFile(filename, inFASTA);
653                 inFASTA.seekg(line.start);
654                 
655                 ifstream qFile;
656                 if(qFileName != "")     {
657                         m->openInputFile(qFileName, qFile);
658                         qFile.seekg(qline.start);  
659                 }
660                 
661                 int count = 0;
662                 bool moreSeqs = 1;
663                 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
664         
665                 while (moreSeqs) {
666                                 
667                         if (m->control_pressed) { 
668                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
669                                 if ((createGroup) && (countfile == "")) {        outGroupsFile.close();   }
670                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
671                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
672                 if(countfile != "")     {       scrapCountFile.close(); trimCountFile.close();  }
673                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;
674                         }
675                         
676                         int success = 1;
677                         string trashCode = "";
678                         int currentSeqsDiffs = 0;
679
680                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
681                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
682             
683                         QualityScores currQual;
684                         if(qFileName != ""){
685                                 currQual = QualityScores(qFile);  m->gobble(qFile);
686                 //cout << currQual.getName() << endl;
687                         }
688                         
689                         string origSeq = currSeq.getUnaligned();
690                         if (origSeq != "") {
691                                 
692                                 int barcodeIndex = 0;
693                                 int primerIndex = 0;
694                                 
695                 if(numLinkers != 0){
696                                         success = trimOligos.stripLinker(currSeq, currQual);
697                                         if(success > ldiffs)            {       trashCode += 'k';       }
698                                         else{ currentSeqsDiffs += success;  }
699
700                                 }
701                 
702                                 if(barcodes.size() != 0){
703                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
704                                         if(success > bdiffs)            {       trashCode += 'b';       }
705                                         else{ currentSeqsDiffs += success;  }
706                                 }
707                                 
708                 if(numSpacers != 0){
709                                         success = trimOligos.stripSpacer(currSeq, currQual);
710                                         if(success > sdiffs)            {       trashCode += 's';       }
711                                         else{ currentSeqsDiffs += success;  }
712
713                                 }
714                 
715                                 if(numFPrimers != 0){
716                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
717                                         if(success > pdiffs)            {       trashCode += 'f';       }
718                                         else{ currentSeqsDiffs += success;  }
719                                 }
720                                 
721                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
722                                 
723                                 if(numRPrimers != 0){
724                                         success = trimOligos.stripReverse(currSeq, currQual);
725                                         if(!success)                            {       trashCode += 'r';       }
726                                 }
727
728                                 if(keepFirst != 0){
729                                         success = keepFirstTrim(currSeq, currQual);
730                                 }
731                                 
732                                 if(removeLast != 0){
733                                         success = removeLastTrim(currSeq, currQual);
734                                         if(!success)                            {       trashCode += 'l';       }
735                                 }
736
737                                 
738                                 if(qFileName != ""){
739                                         int origLength = currSeq.getNumBases();
740                                         
741                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
742                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
743                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
744                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
745                                         else                                            {       success = 1;                            }
746                                         
747                                         //you don't want to trim, if it fails above then scrap it
748                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
749                                         
750                                         if(!success)                            {       trashCode += 'q';       }
751                                 }                               
752                 
753                                 if(minLength > 0 || maxLength > 0){
754                                         success = cullLength(currSeq);
755                                         if(!success)                            {       trashCode += 'l';       }
756                                 }
757                                 if(maxHomoP > 0){
758                                         success = cullHomoP(currSeq);
759                                         if(!success)                            {       trashCode += 'h';       }
760                                 }
761                                 if(maxAmbig != -1){
762                                         success = cullAmbigs(currSeq);
763                                         if(!success)                            {       trashCode += 'n';       }
764                                 }
765                                 
766                                 if(flip){               // should go last                       
767                                         currSeq.reverseComplement();
768                                         if(qFileName != ""){
769                                                 currQual.flipQScores(); 
770                                         }
771                                 }
772                                 
773                 if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + ", trashcode= " + trashCode); if (trashCode.length() != 0) { m->mothurOutEndLine(); } }
774                 
775                                 if(trashCode.length() == 0){
776                                         currSeq.setAligned(currSeq.getUnaligned());
777                                         currSeq.printSequence(trimFASTAFile);
778                                         
779                                         if(qFileName != ""){
780                                                 currQual.printQScores(trimQualFile);
781                                         }
782                                         
783                     
784                                         if(nameFile != ""){
785                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
786                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
787                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
788                                         }
789                     
790                     int numRedundants = 0;
791                     if (countfile != "") {
792                         map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
793                         if (itCount != nameCount.end()) { 
794                             trimCountFile << itCount->first << '\t' << itCount->second << endl;
795                             numRedundants = itCount->second-1;
796                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
797                     }
798                                         
799                                         if (createGroup) {
800                                                 if(barcodes.size() != 0){
801                                                         string thisGroup = barcodeNameVector[barcodeIndex];
802                                                         if (primers.size() != 0) { 
803                                                                 if (primerNameVector[primerIndex] != "") { 
804                                                                         if(thisGroup != "") {
805                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
806                                                                         }else {
807                                                                                 thisGroup = primerNameVector[primerIndex]; 
808                                                                         }
809                                                                 } 
810                                                         }
811                                                         
812                             if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
813                             
814                                                         if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
815                             else {   groupMap[currSeq.getName()] = thisGroup; }
816                                                         
817                                                         if (nameFile != "") {
818                                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
819                                                                 if (itName != nameMap.end()) { 
820                                                                         vector<string> thisSeqsNames; 
821                                                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
822                                     numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
823                                                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
824                                                                                 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
825                                                                         }
826                                                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
827                                                         }
828                                                         
829                                                         map<string, int>::iterator it = groupCounts.find(thisGroup);
830                                                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1 + numRedundants; }
831                                                         else { groupCounts[it->first] += (1 + numRedundants); }
832                                                                 
833                                                 }
834                                         }
835                                         
836                                         if(allFiles){
837                                                 ofstream output;
838                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
839                                                 currSeq.printSequence(output);
840                                                 output.close();
841                                                 
842                                                 if(qFileName != ""){
843                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
844                                                         currQual.printQScores(output);
845                                                         output.close();                                                 
846                                                 }
847                                                 
848                                                 if(nameFile != ""){
849                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
850                                                         if (itName != nameMap.end()) { 
851                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
852                                                                 output << itName->first << '\t' << itName->second << endl; 
853                                                                 output.close();
854                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
855                                                 }
856                                         }
857                                 }
858                                 else{
859                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
860                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
861                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
862                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
863                                         }
864                     if (countfile != "") {
865                         map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
866                         if (itCount != nameCount.end()) { 
867                             trimCountFile << itCount->first << '\t' << itCount->second << endl;
868                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
869                     }
870                     
871                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
872                                         currSeq.setUnaligned(origSeq);
873                                         currSeq.setAligned(origSeq);
874                                         currSeq.printSequence(scrapFASTAFile);
875                                         if(qFileName != ""){
876                                                 currQual.printQScores(scrapQualFile);
877                                         }
878                                 }
879                                 count++;
880                         }
881                         
882                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
883                                 unsigned long long pos = inFASTA.tellg();
884                                 if ((pos == -1) || (pos >= line.end)) { break; }
885                         
886                         #else
887                                 if (inFASTA.eof()) { break; }
888                         #endif
889                         
890                         //report progress
891                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
892                         
893                 }
894                 //report progress
895                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
896                 
897                 
898                 inFASTA.close();
899                 trimFASTAFile.close();
900                 scrapFASTAFile.close();
901                 if (createGroup) {       outGroupsFile.close();   }
902                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
903                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
904         if(countfile != "")     {       scrapCountFile.close(); trimCountFile.close();  }
905                 
906                 return count;
907         }
908         catch(exception& e) {
909                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
910                 exit(1);
911         }
912 }
913
914 /**************************************************************************************************/
915
916 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string trimCountFileName, string scrapCountFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
917         try {
918         
919         int process = 1;
920                 int exitCommand = 1;
921                 processIDS.clear();
922                 
923 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
924                                 //loop through and create all the processes you want
925                 while (process != processors) {
926                         int pid = fork();
927                         
928                         if (pid > 0) {
929                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
930                                 process++;
931                         }else if (pid == 0){
932                                 
933                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
934                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
935                                 vector<vector<string> > tempNameFileNames = nameFileNames;
936
937                                 if(allFiles){
938                                         ofstream temp;
939
940                                         for(int i=0;i<tempFASTAFileNames.size();i++){
941                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
942                                                         if (tempFASTAFileNames[i][j] != "") {
943                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
944                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
945
946                                                                 if(qFileName != ""){
947                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
948                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
949                                                                 }
950                                                                 if(nameFile != ""){
951                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
952                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
953                                                                 }
954                                                         }
955                                                 }
956                                         }
957                                 }
958                                                         
959                                 driverCreateTrim(filename,
960                                                                  qFileName,
961                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
962                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
963                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
964                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
965                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
966                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
967                                  (trimCountFileName + toString(getpid()) + ".temp"),
968                                                                  (scrapCountFileName + toString(getpid()) + ".temp"),
969                                                                  (groupFile + toString(getpid()) + ".temp"),
970                                                                  tempFASTAFileNames,
971                                                                  tempPrimerQualFileNames,
972                                                                  tempNameFileNames,
973                                                                  lines[process],
974                                                                  qLines[process]);
975                 
976                 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); }
977                                 
978                                 //pass groupCounts to parent
979                                 if(createGroup){
980                                         ofstream out;
981                                         string tempFile = filename + toString(getpid()) + ".num.temp";
982                                         m->openOutputFile(tempFile, out);
983                                         
984                                         out << groupCounts.size() << endl;
985                                         
986                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
987                                                 out << it->first << '\t' << it->second << endl;
988                                         }
989                     
990                     out << groupMap.size() << endl;
991                     for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
992                                                 out << it->first << '\t' << it->second << endl;
993                                         }
994                                         out.close();
995                                 }
996                                 exit(0);
997                         }else { 
998                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
999                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1000                                 exit(0);
1001                         }
1002                 }
1003                 
1004                 //parent do my part
1005                 ofstream temp;
1006                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
1007                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
1008                 if(qFileName != ""){
1009                         m->openOutputFile(trimQualFileName, temp);              temp.close();
1010                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
1011                 }
1012                 if (nameFile != "") {
1013                         m->openOutputFile(trimNameFileName, temp);              temp.close();
1014                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
1015                 }
1016         if (countfile != "") {
1017                         m->openOutputFile(trimCountFileName, temp);             temp.close();
1018                         m->openOutputFile(scrapCountFileName, temp);            temp.close();
1019                 }
1020
1021                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, trimCountFileName, scrapCountFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
1022                 
1023                 //force parent to wait until all the processes are done
1024                 for (int i=0;i<processIDS.size();i++) { 
1025                         int temp = processIDS[i];
1026                         wait(&temp);
1027                 }
1028 #else
1029         //////////////////////////////////////////////////////////////////////////////////////////////////////
1030                 //Windows version shared memory, so be careful when passing variables through the trimData struct. 
1031                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1032                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1033                 
1034                 vector<trimData*> pDataArray; 
1035                 DWORD   dwThreadIdArray[processors-1];
1036                 HANDLE  hThreadArray[processors-1]; 
1037                 
1038                 //Create processor worker threads.
1039                 for( int h=0; h<processors-1; h++){
1040                         
1041             string extension = "";
1042                         if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
1043             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1044             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1045             vector<vector<string> > tempNameFileNames = nameFileNames;
1046             
1047             if(allFiles){
1048                 ofstream temp;
1049                 
1050                 for(int i=0;i<tempFASTAFileNames.size();i++){
1051                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
1052                         if (tempFASTAFileNames[i][j] != "") {
1053                             tempFASTAFileNames[i][j] += extension;
1054                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
1055                             
1056                             if(qFileName != ""){
1057                                 tempPrimerQualFileNames[i][j] += extension;
1058                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
1059                             }
1060                             if(nameFile != ""){
1061                                 tempNameFileNames[i][j] += extension;
1062                                 m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
1063                             }
1064                         }
1065                     }
1066                 }
1067             }
1068
1069             
1070                         trimData* tempTrim = new trimData(filename,
1071                                               qFileName, nameFile, countfile,
1072                                               (trimFASTAFileName+extension),
1073                                               (scrapFASTAFileName+extension),
1074                                               (trimQualFileName+extension),
1075                                               (scrapQualFileName+extension),
1076                                               (trimNameFileName+extension),
1077                                               (scrapNameFileName+extension),
1078                                               (trimCountFileName+extension),
1079                                               (scrapCountFileName+extension),
1080                                               (groupFile+extension),
1081                                               tempFASTAFileNames,
1082                                               tempPrimerQualFileNames,
1083                                               tempNameFileNames,
1084                                               lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
1085                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, 
1086                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
1087                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
1088                                              minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap, nameCount);
1089                         pDataArray.push_back(tempTrim);
1090             
1091                         hThreadArray[h] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
1092                 }
1093         
1094         //parent do my part
1095                 ofstream temp;
1096                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
1097                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
1098                 if(qFileName != ""){
1099                         m->openOutputFile(trimQualFileName, temp);              temp.close();
1100                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
1101                 }
1102                 if (nameFile != "") {
1103                         m->openOutputFile(trimNameFileName, temp);              temp.close();
1104                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
1105                 }
1106         vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1107         vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1108         vector<vector<string> > tempNameFileNames = nameFileNames;
1109         if(allFiles){
1110             ofstream temp;
1111             string extension = toString(processors-1) + ".temp";
1112             for(int i=0;i<tempFASTAFileNames.size();i++){
1113                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1114                     if (tempFASTAFileNames[i][j] != "") {
1115                         tempFASTAFileNames[i][j] += extension;
1116                         m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
1117                         
1118                         if(qFileName != ""){
1119                             tempPrimerQualFileNames[i][j] += extension;
1120                             m->openOutputFile(tempPrimerQualFileNames[i][j], temp);             temp.close();
1121                         }
1122                         if(nameFile != ""){
1123                             tempNameFileNames[i][j] += extension;
1124                             m->openOutputFile(tempNameFileNames[i][j], temp);           temp.close();
1125                         }
1126                     }
1127                 }
1128             }
1129         }
1130         
1131                 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames, tempNameFileNames, lines[processors-1], qLines[processors-1]);
1132         processIDS.push_back(processors-1);
1133
1134         
1135                 //Wait until all threads have terminated.
1136                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1137                 
1138                 //Close all thread handles and free memory allocations.
1139                 for(int i=0; i < pDataArray.size(); i++){
1140                         for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
1141                 map<string, int>::iterator it2 = groupCounts.find(it->first);
1142                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
1143                 else { groupCounts[it->first] += it->second; }
1144             }
1145             for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
1146                 map<string, string>::iterator it2 = groupMap.find(it->first);
1147                 if (it2 == groupMap.end()) {    groupMap[it->first] = it->second; }
1148                 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
1149             }
1150             CloseHandle(hThreadArray[i]);
1151                         delete pDataArray[i];
1152                 }
1153         
1154 #endif          
1155         
1156         
1157         //append files
1158                 for(int i=0;i<processIDS.size();i++){
1159                         
1160                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1161                         
1162                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
1163                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
1164                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1165                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1166                         
1167                         if(qFileName != ""){
1168                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1169                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1170                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1171                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1172                         }
1173                         
1174                         if(nameFile != ""){
1175                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1176                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1177                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1178                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1179                         }
1180             
1181             if(countfile != ""){
1182                                 m->appendFiles((trimCountFileName + toString(processIDS[i]) + ".temp"), trimCountFileName);
1183                                 m->mothurRemove((trimCountFileName + toString(processIDS[i]) + ".temp"));
1184                                 m->appendFiles((scrapCountFileName + toString(processIDS[i]) + ".temp"), scrapCountFileName);
1185                                 m->mothurRemove((scrapCountFileName + toString(processIDS[i]) + ".temp"));
1186                         }
1187                         
1188                         if((createGroup)&&(countfile == "")){
1189                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1190                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1191                         }
1192                         
1193                         
1194                         if(allFiles){
1195                                 for(int j=0;j<fastaFileNames.size();j++){
1196                                         for(int k=0;k<fastaFileNames[j].size();k++){
1197                                                 if (fastaFileNames[j][k] != "") {
1198                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1199                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1200                                                         
1201                                                         if(qFileName != ""){
1202                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1203                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1204                                                         }
1205                                                         
1206                                                         if(nameFile != ""){
1207                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1208                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1209                                                         }
1210                                                 }
1211                                         }
1212                                 }
1213                         }
1214                         
1215             #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1216                         if(createGroup){
1217                                 ifstream in;
1218                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1219                                 m->openInputFile(tempFile, in);
1220                                 int tempNum;
1221                                 string group;
1222                                 
1223                                 in >> tempNum; m->gobble(in);
1224                                 
1225                                 if (tempNum != 0) {
1226                                         for (int i = 0; i < tempNum; i++) { 
1227                         int groupNum;
1228                                                 in >> group >> groupNum; m->gobble(in);
1229                         
1230                                                 map<string, int>::iterator it = groupCounts.find(group);
1231                                                 if (it == groupCounts.end()) {  groupCounts[group] = groupNum; }
1232                                                 else { groupCounts[it->first] += groupNum; }
1233                                         }
1234                                 }
1235                 in >> tempNum; m->gobble(in);
1236                 if (tempNum != 0) {
1237                                         for (int i = 0; i < tempNum; i++) { 
1238                         string group, seqName;
1239                                                 in >> seqName >> group; m->gobble(in);
1240                         
1241                                                 map<string, string>::iterator it = groupMap.find(seqName);
1242                                                 if (it == groupMap.end()) {     groupMap[seqName] = group; }
1243                                                 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
1244                                         }
1245                                 }
1246                 
1247                                 in.close(); m->mothurRemove(tempFile);
1248                         }
1249             #endif
1250                 }
1251
1252         return exitCommand;
1253         }
1254         catch(exception& e) {
1255                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1256                 exit(1);
1257         }
1258 }
1259
1260 /**************************************************************************************************/
1261
1262 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1263         try {
1264         
1265         vector<unsigned long long> fastaFilePos;
1266                 vector<unsigned long long> qfileFilePos;
1267                 
1268                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1269                 //set file positions for fasta file
1270                 fastaFilePos = m->divideFile(filename, processors);
1271                 
1272                 //get name of first sequence in each chunk
1273                 map<string, int> firstSeqNames;
1274                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1275                         ifstream in;
1276                         m->openInputFile(filename, in);
1277                         in.seekg(fastaFilePos[i]);
1278                 
1279                         Sequence temp(in); 
1280                         firstSeqNames[temp.getName()] = i;
1281                 
1282                         in.close();
1283                 }
1284                 
1285                 if(qfilename != "")     {
1286             //seach for filePos of each first name in the qfile and save in qfileFilePos
1287             ifstream inQual;
1288             m->openInputFile(qfilename, inQual);
1289             
1290             string input;
1291             while(!inQual.eof()){       
1292                 input = m->getline(inQual);
1293                 
1294                 if (input.length() != 0) {
1295                     if(input[0] == '>'){ //this is a sequence name line
1296                         istringstream nameStream(input);
1297                         
1298                         string sname = "";  nameStream >> sname;
1299                         sname = sname.substr(1);
1300                         
1301                         map<string, int>::iterator it = firstSeqNames.find(sname);
1302                         
1303                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
1304                             unsigned long long pos = inQual.tellg(); 
1305                             qfileFilePos.push_back(pos - input.length() - 1);   
1306                             firstSeqNames.erase(it);
1307                         }
1308                     }
1309                 }
1310                 
1311                 if (firstSeqNames.size() == 0) { break; }
1312             }
1313             inQual.close();
1314             
1315             
1316             if (firstSeqNames.size() != 0) { 
1317                 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1318                     m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1319                 }
1320                 qFileName = "";
1321                 return processors;
1322             }
1323             
1324             //get last file position of qfile
1325             FILE * pFile;
1326             unsigned long long size;
1327             
1328             //get num bytes in file
1329             pFile = fopen (qfilename.c_str(),"rb");
1330             if (pFile==NULL) perror ("Error opening file");
1331             else{
1332                 fseek (pFile, 0, SEEK_END);
1333                 size=ftell (pFile);
1334                 fclose (pFile);
1335             }
1336             
1337             qfileFilePos.push_back(size);
1338         }
1339         
1340         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1341             if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); }
1342                         lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1343                         if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
1344                 }       
1345                 if(qfilename == "")     {       qLines = lines; } //files with duds
1346                 
1347                 return processors;
1348                 
1349                 #else
1350             
1351         if (processors == 1) { //save time
1352                         //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1353                         //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1354             lines.push_back(linePair(0, 1000));
1355             if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
1356         }else{
1357             int numFastaSeqs = 0;
1358             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
1359             if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1360         
1361             if (qfilename != "") { 
1362                 int numQualSeqs = 0;
1363                 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
1364                 
1365                 if (numFastaSeqs != numQualSeqs) {
1366                     m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
1367                 }
1368             }
1369         
1370             //figure out how many sequences you have to process
1371             int numSeqsPerProcessor = numFastaSeqs / processors;
1372             for (int i = 0; i < processors; i++) {
1373                 int startIndex =  i * numSeqsPerProcessor;
1374                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
1375                 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1376                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1377             }
1378         }
1379             if(qfilename == "") {       qLines = lines; } //files with duds
1380                         return 1;
1381                 
1382                 #endif
1383         }
1384         catch(exception& e) {
1385                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1386                 exit(1);
1387         }
1388 }
1389
1390 //***************************************************************************************************************
1391
1392 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1393         try {
1394                 ifstream inOligos;
1395                 m->openInputFile(oligoFile, inOligos);
1396                 
1397                 ofstream test;
1398                 
1399                 string type, oligo, group;
1400
1401                 int indexPrimer = 0;
1402                 int indexBarcode = 0;
1403                 
1404                 while(!inOligos.eof()){
1405
1406                         inOligos >> type; 
1407             
1408                         if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
1409             
1410                         if(type[0] == '#'){
1411                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1412                                 m->gobble(inOligos);
1413                         }
1414                         else{
1415                                 m->gobble(inOligos);
1416                                 //make type case insensitive
1417                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1418                                 
1419                                 inOligos >> oligo;
1420                 
1421                 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
1422                                 
1423                                 for(int i=0;i<oligo.length();i++){
1424                                         oligo[i] = toupper(oligo[i]);
1425                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1426                                 }
1427                                 
1428                                 if(type == "FORWARD"){
1429                                         group = "";
1430                                         
1431                                         // get rest of line in case there is a primer name
1432                                         while (!inOligos.eof()) {       
1433                                                 char c = inOligos.get(); 
1434                                                 if (c == 10 || c == 13){        break;  }
1435                                                 else if (c == 32 || c == 9){;} //space or tab
1436                                                 else {  group += c;  }
1437                                         } 
1438                                         
1439                                         //check for repeat barcodes
1440                                         map<string, int>::iterator itPrime = primers.find(oligo);
1441                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1442                                         
1443                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); }  }
1444                     
1445                                         primers[oligo]=indexPrimer; indexPrimer++;              
1446                                         primerNameVector.push_back(group);
1447                                 }
1448                                 else if(type == "REVERSE"){
1449                                         //Sequence oligoRC("reverse", oligo);
1450                                         //oligoRC.reverseComplement();
1451                     string oligoRC = reverseOligo(oligo);
1452                                         revPrimer.push_back(oligoRC);
1453                                 }
1454                                 else if(type == "BARCODE"){
1455                                         inOligos >> group;
1456                                         
1457                                         //check for repeat barcodes
1458                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1459                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1460                     
1461                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1462                                         barcodeNameVector.push_back(group);
1463                                 }else if(type == "LINKER"){
1464                                         linker.push_back(oligo);
1465                                 }else if(type == "SPACER"){
1466                                         spacer.push_back(oligo);
1467                                 }
1468                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1469                         }
1470                         m->gobble(inOligos);
1471                 }       
1472                 inOligos.close();
1473                 
1474                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1475                 
1476                 //add in potential combos
1477                 if(barcodeNameVector.size() == 0){
1478                         barcodes[""] = 0;
1479                         barcodeNameVector.push_back("");                        
1480                 }
1481                 
1482                 if(primerNameVector.size() == 0){
1483                         primers[""] = 0;
1484                         primerNameVector.push_back("");                 
1485                 }
1486                 
1487                 fastaFileNames.resize(barcodeNameVector.size());
1488                 for(int i=0;i<fastaFileNames.size();i++){
1489                         fastaFileNames[i].assign(primerNameVector.size(), "");
1490                 }
1491                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1492                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1493                 
1494                 if(allFiles){
1495                         set<string> uniqueNames; //used to cleanup outputFileNames
1496                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1497                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1498                                         
1499                                         string primerName = primerNameVector[itPrimer->second];
1500                                         string barcodeName = barcodeNameVector[itBar->second];
1501                                         
1502                                         string comboGroupName = "";
1503                                         string fastaFileName = "";
1504                                         string qualFileName = "";
1505                                         string nameFileName = "";
1506                     string countFileName = "";
1507                                         
1508                                         if(primerName == ""){
1509                                                 comboGroupName = barcodeNameVector[itBar->second];
1510                                         }
1511                                         else{
1512                                                 if(barcodeName == ""){
1513                                                         comboGroupName = primerNameVector[itPrimer->second];
1514                                                 }
1515                                                 else{
1516                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1517                                                 }
1518                                         }
1519                                         
1520                                         
1521                                         ofstream temp;
1522                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1523                                         if (uniqueNames.count(fastaFileName) == 0) {
1524                                                 outputNames.push_back(fastaFileName);
1525                                                 outputTypes["fasta"].push_back(fastaFileName);
1526                                                 uniqueNames.insert(fastaFileName);
1527                                         }
1528                                         
1529                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1530                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1531                                         
1532                                         if(qFileName != ""){
1533                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1534                                                 if (uniqueNames.count(qualFileName) == 0) {
1535                                                         outputNames.push_back(qualFileName);
1536                                                         outputTypes["qfile"].push_back(qualFileName);
1537                                                 }
1538                                                 
1539                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1540                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1541                                         }
1542                                         
1543                                         if(nameFile != ""){
1544                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1545                                                 if (uniqueNames.count(nameFileName) == 0) {
1546                                                         outputNames.push_back(nameFileName);
1547                                                         outputTypes["name"].push_back(nameFileName);
1548                                                 }
1549                                                 
1550                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1551                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1552                                         }
1553                                 }
1554                         }
1555                 }
1556                 numFPrimers = primers.size();
1557                 numRPrimers = revPrimer.size();
1558         numLinkers = linker.size();
1559         numSpacers = spacer.size();
1560                 
1561                 bool allBlank = true;
1562                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1563                         if (barcodeNameVector[i] != "") {
1564                                 allBlank = false;
1565                                 break;
1566                         }
1567                 }
1568                 for (int i = 0; i < primerNameVector.size(); i++) {
1569                         if (primerNameVector[i] != "") {
1570                                 allBlank = false;
1571                                 break;
1572                         }
1573                 }
1574
1575                 if (allBlank) {
1576                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1577                         allFiles = false;
1578                         return false;
1579                 }
1580                 
1581                 return true;
1582                 
1583         }
1584         catch(exception& e) {
1585                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1586                 exit(1);
1587         }
1588 }
1589 //***************************************************************************************************************
1590
1591 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1592         try {
1593                 bool success = 1;
1594                 if(qscores.getName() != ""){
1595                         qscores.trimQScores(-1, keepFirst);
1596                 }
1597                 sequence.trim(keepFirst);
1598                 return success;
1599         }
1600         catch(exception& e) {
1601                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1602                 exit(1);
1603         }
1604         
1605 }       
1606
1607 //***************************************************************************************************************
1608
1609 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1610         try {
1611                 bool success = 0;
1612                 
1613                 int length = sequence.getNumBases() - removeLast;
1614                 
1615                 if(length > 0){
1616                         if(qscores.getName() != ""){
1617                                 qscores.trimQScores(-1, length);
1618                         }
1619                         sequence.trim(length);
1620                         success = 1;
1621                 }
1622                 else{
1623                         success = 0;
1624                 }
1625
1626                 return success;
1627         }
1628         catch(exception& e) {
1629                 m->errorOut(e, "removeLastTrim", "countDiffs");
1630                 exit(1);
1631         }
1632         
1633 }       
1634
1635 //***************************************************************************************************************
1636
1637 bool TrimSeqsCommand::cullLength(Sequence& seq){
1638         try {
1639         
1640                 int length = seq.getNumBases();
1641                 bool success = 0;       //guilty until proven innocent
1642                 
1643                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1644                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1645                 else                                                                                            {       success = 0;    }
1646                 
1647                 return success;
1648         
1649         }
1650         catch(exception& e) {
1651                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1652                 exit(1);
1653         }
1654         
1655 }
1656
1657 //***************************************************************************************************************
1658
1659 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1660         try {
1661                 int longHomoP = seq.getLongHomoPolymer();
1662                 bool success = 0;       //guilty until proven innocent
1663                 
1664                 if(longHomoP <= maxHomoP){      success = 1;    }
1665                 else                                    {       success = 0;    }
1666                 
1667                 return success;
1668         }
1669         catch(exception& e) {
1670                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1671                 exit(1);
1672         }
1673         
1674 }
1675 //********************************************************************/
1676 string TrimSeqsCommand::reverseOligo(string oligo){
1677         try {
1678         string reverse = "";
1679         
1680         for(int i=oligo.length()-1;i>=0;i--){
1681             
1682             if(oligo[i] == 'A')         {       reverse += 'T'; }
1683             else if(oligo[i] == 'T'){   reverse += 'A'; }
1684             else if(oligo[i] == 'U'){   reverse += 'A'; }
1685             
1686             else if(oligo[i] == 'G'){   reverse += 'C'; }
1687             else if(oligo[i] == 'C'){   reverse += 'G'; }
1688             
1689             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1690             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1691             
1692             else if(oligo[i] == 'M'){   reverse += 'K'; }
1693             else if(oligo[i] == 'K'){   reverse += 'M'; }
1694             
1695             else if(oligo[i] == 'W'){   reverse += 'W'; }
1696             else if(oligo[i] == 'S'){   reverse += 'S'; }
1697             
1698             else if(oligo[i] == 'B'){   reverse += 'V'; }
1699             else if(oligo[i] == 'V'){   reverse += 'B'; }
1700             
1701             else if(oligo[i] == 'D'){   reverse += 'H'; }
1702             else if(oligo[i] == 'H'){   reverse += 'D'; }
1703             
1704             else                                                {       reverse += 'N'; }
1705         }
1706         
1707         
1708         return reverse;
1709     }
1710         catch(exception& e) {
1711                 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1712                 exit(1);
1713         }
1714 }
1715
1716 //***************************************************************************************************************
1717
1718 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1719         try {
1720                 int numNs = seq.getAmbigBases();
1721                 bool success = 0;       //guilty until proven innocent
1722                 
1723                 if(numNs <= maxAmbig)   {       success = 1;    }
1724                 else                                    {       success = 0;    }
1725                 
1726                 return success;
1727         }
1728         catch(exception& e) {
1729                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1730                 exit(1);
1731         }
1732         
1733 }
1734 //***************************************************************************************************************