]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14
15 //**********************************************************************************************************************
16 vector<string> TrimSeqsCommand::setParameters(){        
17         try {
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
19                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
20                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","qfile",false,false,true); parameters.push_back(pqfile);
21                 CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname);
22         CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount);
23                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pflip);
24                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig);
25                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop);
26                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength);
27                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength);
28                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
29                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
30         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
31                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
32         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
33                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
34                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
35                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward);
36                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim);
37                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold);
38                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage);
39                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(prollaverage);
40                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqwindowaverage);
41                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pqstepsize);
42                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "","",false,false); parameters.push_back(pqwindowsize);
43                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pkeepfirst);
44                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "","",false,false); parameters.push_back(premovelast);
45                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
46                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
47                         
48                 vector<string> myArray;
49                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
50                 return myArray;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
54                 exit(1);
55         }
56 }
57 //**********************************************************************************************************************
58 string TrimSeqsCommand::getHelpString(){        
59         try {
60                 string helpString = "";
61                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
62                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
63                 helpString += "The trim.seqs command parameters are fasta, name, count, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
64                 helpString += "The fasta parameter is required.\n";
65                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
66                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
67                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
68         helpString += "The count parameter allows you to provide a count file with your fasta file.\n";
69                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
70                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
71                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
72                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
73                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
74                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
75                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
76         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
77                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
78                 helpString += "The qfile parameter allows you to provide a quality file.\n";
79                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
80                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
81                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
82                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
83                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
84                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
85                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
86                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
87                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
88                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
89                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
90                 helpString += "The trim.seqs command should be in the following format: \n";
91                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
92                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
93                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
94                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
95                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
96                 return helpString;
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
100                 exit(1);
101         }
102 }
103 //**********************************************************************************************************************
104 string TrimSeqsCommand::getOutputPattern(string type) {
105     try {
106         string pattern = "";
107         
108         if (type == "qfile") {  pattern = "[filename],[tag],qual"; } 
109         else if (type == "fasta") {  pattern = "[filename],[tag],fasta"; } 
110         else if (type == "group") {  pattern = "[filename],groups"; }
111         else if (type == "name") {  pattern = "[filename],[tag],names"; }
112         else if (type == "count") {  pattern = "[filename],[tag],count_table"; }
113         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
114         
115         return pattern;
116     }
117     catch(exception& e) {
118         m->errorOut(e, "TrimSeqsCommand", "getOutputPattern");
119         exit(1);
120     }
121 }
122 //**********************************************************************************************************************
123
124 TrimSeqsCommand::TrimSeqsCommand(){     
125         try {
126                 abort = true; calledHelp = true; 
127                 setParameters();
128                 vector<string> tempOutNames;
129                 outputTypes["fasta"] = tempOutNames;
130                 outputTypes["qfile"] = tempOutNames;
131                 outputTypes["group"] = tempOutNames;
132                 outputTypes["name"] = tempOutNames;
133         outputTypes["count"] = tempOutNames;
134         }
135         catch(exception& e) {
136                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
137                 exit(1);
138         }
139 }
140 //***************************************************************************************************************
141
142 TrimSeqsCommand::TrimSeqsCommand(string option)  {
143         try {
144                 
145                 abort = false; calledHelp = false;   
146                 comboStarts = 0;
147                 
148                 //allow user to run help
149                 if(option == "help") { help(); abort = true; calledHelp = true; }
150                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
151                 
152                 else {
153                         vector<string> myArray = setParameters();
154                         
155                         OptionParser parser(option);
156                         map<string,string> parameters = parser.getParameters();
157                         
158                         ValidParameters validParameter;
159                         map<string,string>::iterator it;
160                         
161                         //check to make sure all parameters are valid for command
162                         for (it = parameters.begin(); it != parameters.end(); it++) { 
163                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
164                         }
165                         
166                         //initialize outputTypes
167                         vector<string> tempOutNames;
168                         outputTypes["fasta"] = tempOutNames;
169                         outputTypes["qfile"] = tempOutNames;
170                         outputTypes["group"] = tempOutNames;
171                         outputTypes["name"] = tempOutNames;
172             outputTypes["count"] = tempOutNames;
173                         
174                         //if the user changes the input directory command factory will send this info to us in the output parameter 
175                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
176                         if (inputDir == "not found"){   inputDir = "";          }
177                         else {
178                                 string path;
179                                 it = parameters.find("fasta");
180                                 //user has given a template file
181                                 if(it != parameters.end()){ 
182                                         path = m->hasPath(it->second);
183                                         //if the user has not given a path then, add inputdir. else leave path alone.
184                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
185                                 }
186                                 
187                                 it = parameters.find("oligos");
188                                 //user has given a template file
189                                 if(it != parameters.end()){ 
190                                         path = m->hasPath(it->second);
191                                         //if the user has not given a path then, add inputdir. else leave path alone.
192                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
193                                 }
194                                 
195                                 it = parameters.find("qfile");
196                                 //user has given a template file
197                                 if(it != parameters.end()){ 
198                                         path = m->hasPath(it->second);
199                                         //if the user has not given a path then, add inputdir. else leave path alone.
200                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
201                                 }
202                                 
203                                 it = parameters.find("name");
204                                 //user has given a template file
205                                 if(it != parameters.end()){ 
206                                         path = m->hasPath(it->second);
207                                         //if the user has not given a path then, add inputdir. else leave path alone.
208                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
209                                 }
210                 
211                 it = parameters.find("count");
212                                 //user has given a template file
213                                 if(it != parameters.end()){ 
214                                         path = m->hasPath(it->second);
215                                         //if the user has not given a path then, add inputdir. else leave path alone.
216                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
217                                 }
218                                 
219                         }
220
221                         
222                         //check for required parameters
223                         fastaFile = validParameter.validFile(parameters, "fasta", true);
224                         if (fastaFile == "not found") {                                 
225                                 fastaFile = m->getFastaFile(); 
226                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
227                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
228                         }else if (fastaFile == "not open") { abort = true; }    
229                         else { m->setFastaFile(fastaFile); }
230                         
231                         //if the user changes the output directory command factory will send this info to us in the output parameter 
232                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
233                                 outputDir = ""; 
234                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
235                         }
236                 
237                         
238                         //check for optional parameter and set defaults
239                         // ...at some point should added some additional type checking...
240                         string temp;
241                         temp = validParameter.validFile(parameters, "flip", false);
242                         if (temp == "not found")    {   flip = 0;       }
243                         else {  flip = m->isTrue(temp);         }
244                 
245                         temp = validParameter.validFile(parameters, "oligos", true);
246                         if (temp == "not found"){       oligoFile = "";         }
247                         else if(temp == "not open"){    abort = true;   } 
248                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
249                         
250                         
251                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
252                         m->mothurConvert(temp, maxAmbig);  
253
254                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
255                         m->mothurConvert(temp, maxHomoP);  
256
257                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
258                         m->mothurConvert(temp, minLength); 
259                         
260                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
261                         m->mothurConvert(temp, maxLength);
262                         
263                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
264                         m->mothurConvert(temp, bdiffs);
265                         
266                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
267                         m->mothurConvert(temp, pdiffs);
268             
269             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
270                         m->mothurConvert(temp, ldiffs);
271             
272             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
273                         m->mothurConvert(temp, sdiffs);
274                         
275                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
276                         m->mothurConvert(temp, tdiffs);
277                         
278                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
279                         
280                         temp = validParameter.validFile(parameters, "qfile", true);     
281                         if (temp == "not found")        {       qFileName = "";         }
282                         else if(temp == "not open")     {       abort = true;           }
283                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
284                         
285                         temp = validParameter.validFile(parameters, "name", true);      
286                         if (temp == "not found")        {       nameFile = "";          }
287                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
288                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
289             
290             countfile = validParameter.validFile(parameters, "count", true);
291                         if (countfile == "not open") { abort = true; countfile = ""; }  
292                         else if (countfile == "not found") { countfile = ""; }
293                         else { m->setCountTableFile(countfile); }
294                         
295             if ((countfile != "") && (nameFile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
296                         
297                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
298                         m->mothurConvert(temp, qThreshold);
299                         
300                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
301                         qtrim = m->isTrue(temp);
302
303                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
304                         convert(temp, qRollAverage);
305
306                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
307                         convert(temp, qWindowAverage);
308
309                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
310                         convert(temp, qWindowSize);
311
312                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
313                         convert(temp, qWindowStep);
314
315                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
316                         convert(temp, qAverage);
317
318                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
319                         convert(temp, keepFirst);
320
321                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
322                         convert(temp, removeLast);
323                         
324                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
325                         allFiles = m->isTrue(temp);
326             
327             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
328                         keepforward = m->isTrue(temp);
329                         
330                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
331                         m->setProcessors(temp);
332                         m->mothurConvert(temp, processors); 
333                         
334                         
335                         if(allFiles && (oligoFile == "")){
336                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
337                         }
338                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
339                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
340                                 qAverage=0;
341                                 qThreshold=0;
342                         }
343                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
344                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
345                                 abort = true;
346                         }
347                         
348             if (countfile == "") {
349                 if (nameFile == "") {
350                     vector<string> files; files.push_back(fastaFile);
351                     parser.getNameFile(files);
352                 }
353             }
354                 }
355
356         }
357         catch(exception& e) {
358                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
359                 exit(1);
360         }
361 }
362 //***************************************************************************************************************
363
364 int TrimSeqsCommand::execute(){
365         try{
366         
367                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
368                 
369                 numFPrimers = 0;  //this needs to be initialized
370                 numRPrimers = 0;
371         numSpacers = 0;
372         numLinkers = 0;
373                 createGroup = false;
374                 vector<vector<string> > fastaFileNames;
375                 vector<vector<string> > qualFileNames;
376                 vector<vector<string> > nameFileNames;
377                 
378         map<string, string> variables; 
379                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
380         variables["[tag]"] = "trim";
381                 string trimSeqFile = getOutputFileName("fasta",variables);
382         string trimQualFile = getOutputFileName("qfile",variables);
383                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
384         
385         variables["[tag]"] = "scrap";
386                 string scrapSeqFile = getOutputFileName("fasta",variables);
387         string scrapQualFile = getOutputFileName("qfile",variables);
388                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
389                 
390                 if (qFileName != "") {
391                         outputNames.push_back(trimQualFile);
392                         outputNames.push_back(scrapQualFile);
393                         outputTypes["qfile"].push_back(trimQualFile);
394                         outputTypes["qfile"].push_back(scrapQualFile); 
395                 }
396                 
397         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
398         variables["[tag]"] = "trim";
399                 string trimNameFile = getOutputFileName("name",variables);
400         variables["[tag]"] = "scrap";
401                 string scrapNameFile = getOutputFileName("name",variables);
402                 
403                 if (nameFile != "") {
404                         m->readNames(nameFile, nameMap);
405                         outputNames.push_back(trimNameFile);
406                         outputNames.push_back(scrapNameFile);
407                         outputTypes["name"].push_back(trimNameFile);
408                         outputTypes["name"].push_back(scrapNameFile); 
409                 }
410         
411         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
412         variables["[tag]"] = "trim";
413         string trimCountFile = getOutputFileName("count",variables);
414         variables["[tag]"] = "scrap";
415                 string scrapCountFile = getOutputFileName("count",variables);
416                 
417                 if (countfile != "") {
418             CountTable ct;
419             ct.readTable(countfile);
420             nameCount = ct.getNameMap();
421                         outputNames.push_back(trimCountFile);
422                         outputNames.push_back(scrapCountFile);
423                         outputTypes["count"].push_back(trimCountFile);
424                         outputTypes["count"].push_back(scrapCountFile); 
425                 }
426
427                 
428                 if (m->control_pressed) { return 0; }
429                 
430                 string outputGroupFileName;
431                 if(oligoFile != ""){
432                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
433                         if ((createGroup) && (countfile == "")){
434                 map<string, string> myvariables; 
435                 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
436                                 outputGroupFileName = getOutputFileName("group",myvariables);
437                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
438                         }
439                 }
440        
441         //fills lines and qlines
442                 setLines(fastaFile, qFileName);
443                 
444         if(processors == 1){
445             driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
446         }else{
447             createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
448         }       
449                 
450                 
451                 if (m->control_pressed) {  return 0; }                  
452         
453                 if(allFiles){
454                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
455                         map<string, string>::iterator it;
456                         set<string> namesToRemove;
457                         for(int i=0;i<fastaFileNames.size();i++){
458                                 for(int j=0;j<fastaFileNames[0].size();j++){
459                                         if (fastaFileNames[i][j] != "") {
460                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
461                                                         if(m->isBlank(fastaFileNames[i][j])){
462                                                                 m->mothurRemove(fastaFileNames[i][j]);
463                                                                 namesToRemove.insert(fastaFileNames[i][j]);
464                                                         
465                                                                 if(qFileName != ""){
466                                                                         m->mothurRemove(qualFileNames[i][j]);
467                                                                         namesToRemove.insert(qualFileNames[i][j]);
468                                                                 }
469                                                                 
470                                                                 if(nameFile != ""){
471                                                                         m->mothurRemove(nameFileNames[i][j]);
472                                                                         namesToRemove.insert(nameFileNames[i][j]);
473                                                                 }
474                                                         }else{  
475                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
476                                                                 if (it == uniqueFastaNames.end()) {     
477                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
478                                                                 }       
479                                                         }
480                                                 }
481                                         }
482                                 }
483                         }
484                         
485                         //remove names for outputFileNames, just cleans up the output
486                         vector<string> outputNames2;
487                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
488                         outputNames = outputNames2;
489                         
490             for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
491                 ifstream in;
492                 m->openInputFile(it->first, in);
493                 
494                 ofstream out;
495                 map<string, string> myvariables; 
496                 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(it->first));
497                 string thisGroupName = "";
498                 if (countfile == "") { thisGroupName = getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); }
499                 else {  thisGroupName = getOutputFileName("count",variables); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName);  }
500                 m->openOutputFile(thisGroupName, out);
501                 
502                 if (countfile != "") {  out << "Representative_Sequence\ttotal\t" << it->second << endl;  }
503                 
504                 while (!in.eof()){
505                     if (m->control_pressed) { break; }
506                     
507                     Sequence currSeq(in); m->gobble(in);
508                     if (countfile == "") {  
509                         out << currSeq.getName() << '\t' << it->second << endl;  
510                         
511                         if (nameFile != "") {
512                             map<string, string>::iterator itName = nameMap.find(currSeq.getName());
513                             if (itName != nameMap.end()) { 
514                                 vector<string> thisSeqsNames; 
515                                 m->splitAtChar(itName->second, thisSeqsNames, ',');
516                                 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
517                                     out << thisSeqsNames[k] << '\t' << it->second << endl;
518                                 }
519                             }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                       
520                         }
521                     }else { 
522                         map<string, int>::iterator itTotalReps = nameCount.find(currSeq.getName());
523                         if (itTotalReps != nameCount.end()) { out << currSeq.getName() << '\t' << itTotalReps->second << '\t' << itTotalReps->second << endl; }
524                         else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
525                     }
526                 }
527                 in.close();
528                 out.close();
529             }
530             
531             if (countfile != "") { //create countfile with group info included
532                 CountTable* ct = new CountTable();
533                 ct->readTable(trimCountFile);
534                 map<string, int> justTrimmedNames = ct->getNameMap();
535                 delete ct;
536                 
537                 CountTable newCt;
538                 for (map<string, int>::iterator itCount = groupCounts.begin(); itCount != groupCounts.end(); itCount++) { newCt.addGroup(itCount->first); }
539                 vector<int> tempCounts; tempCounts.resize(groupCounts.size(), 0);
540                 for (map<string, int>::iterator itNames = justTrimmedNames.begin(); itNames != justTrimmedNames.end(); itNames++) {
541                     newCt.push_back(itNames->first, tempCounts); //add it to the table with no abundance so we can set the groups abundance
542                     map<string, string>::iterator it2 = groupMap.find(itNames->first);
543                     if (it2 != groupMap.end()) { newCt.setAbund(itNames->first, it2->second, itNames->second); }
544                     else { m->mothurOut("[ERROR]: missing group info for " + itNames->first + "."); m->mothurOutEndLine(); m->control_pressed = true; }
545                 }
546                 newCt.printTable(trimCountFile);
547             }
548                 }
549                 
550                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
551
552                 //output group counts
553                 m->mothurOutEndLine();
554                 int total = 0;
555                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
556                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
557                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
558                 }
559                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
560                 
561                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
562
563                 //set fasta file as new current fastafile
564                 string current = "";
565                 itTypes = outputTypes.find("fasta");
566                 if (itTypes != outputTypes.end()) {
567                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
568                 }
569                 
570                 itTypes = outputTypes.find("name");
571                 if (itTypes != outputTypes.end()) {
572                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
573                 }
574                 
575                 itTypes = outputTypes.find("qfile");
576                 if (itTypes != outputTypes.end()) {
577                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
578                 }
579                 
580                 itTypes = outputTypes.find("group");
581                 if (itTypes != outputTypes.end()) {
582                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
583                 }
584         
585         itTypes = outputTypes.find("count");
586                 if (itTypes != outputTypes.end()) {
587                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
588                 }
589
590                 m->mothurOutEndLine();
591                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
592                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
593                 m->mothurOutEndLine();
594                 
595                 return 0;       
596                         
597         }
598         catch(exception& e) {
599                 m->errorOut(e, "TrimSeqsCommand", "execute");
600                 exit(1);
601         }
602 }
603                 
604 /**************************************************************************************/
605 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string trimCFileName, string scrapCFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) { 
606                 
607         try {
608                 
609                 ofstream trimFASTAFile;
610                 m->openOutputFile(trimFileName, trimFASTAFile);
611                 
612                 ofstream scrapFASTAFile;
613                 m->openOutputFile(scrapFileName, scrapFASTAFile);
614                 
615                 ofstream trimQualFile;
616                 ofstream scrapQualFile;
617                 if(qFileName != ""){
618                         m->openOutputFile(trimQFileName, trimQualFile);
619                         m->openOutputFile(scrapQFileName, scrapQualFile);
620                 }
621                 
622                 ofstream trimNameFile;
623                 ofstream scrapNameFile;
624                 if(nameFile != ""){
625                         m->openOutputFile(trimNFileName, trimNameFile);
626                         m->openOutputFile(scrapNFileName, scrapNameFile);
627                 }
628                 
629         ofstream trimCountFile;
630                 ofstream scrapCountFile;
631                 if(countfile != ""){
632                         m->openOutputFile(trimCFileName, trimCountFile);
633                         m->openOutputFile(scrapCFileName, scrapCountFile);
634             if (line.start == 0) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
635                 }
636                 
637                 ofstream outGroupsFile;
638                 if ((createGroup) && (countfile == "")){        m->openOutputFile(groupFileName, outGroupsFile);   }
639                 if(allFiles){
640                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
641                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
642                                         if (fastaFileNames[i][j] != "") {
643                                                 ofstream temp;
644                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
645                                                 if(qFileName != ""){
646                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
647                                                 }
648                                                 
649                                                 if(nameFile != ""){
650                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
651                                                 }
652                                         }
653                                 }
654                         }
655                 }
656                 
657                 ifstream inFASTA;
658                 m->openInputFile(filename, inFASTA);
659                 inFASTA.seekg(line.start);
660                 
661                 ifstream qFile;
662                 if(qFileName != "")     {
663                         m->openInputFile(qFileName, qFile);
664                         qFile.seekg(qline.start);  
665                 }
666                 
667                 int count = 0;
668                 bool moreSeqs = 1;
669                 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
670         
671                 while (moreSeqs) {
672                                 
673                         if (m->control_pressed) { 
674                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
675                                 if ((createGroup) && (countfile == "")) {        outGroupsFile.close();   }
676                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
677                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
678                 if(countfile != "")     {       scrapCountFile.close(); trimCountFile.close();  }
679                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;
680                         }
681                         
682                         int success = 1;
683                         string trashCode = "";
684                         int currentSeqsDiffs = 0;
685
686                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
687                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
688             
689                         QualityScores currQual;
690                         if(qFileName != ""){
691                                 currQual = QualityScores(qFile);  m->gobble(qFile);
692                 //cout << currQual.getName() << endl;
693                         }
694                         
695                         string origSeq = currSeq.getUnaligned();
696                         if (origSeq != "") {
697                                 
698                                 int barcodeIndex = 0;
699                                 int primerIndex = 0;
700                                 
701                 if(numLinkers != 0){
702                                         success = trimOligos.stripLinker(currSeq, currQual);
703                                         if(success > ldiffs)            {       trashCode += 'k';       }
704                                         else{ currentSeqsDiffs += success;  }
705
706                                 }
707                 
708                                 if(barcodes.size() != 0){
709                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
710                                         if(success > bdiffs)            {       trashCode += 'b';       }
711                                         else{ currentSeqsDiffs += success;  }
712                                 }
713                                 
714                 if(numSpacers != 0){
715                                         success = trimOligos.stripSpacer(currSeq, currQual);
716                                         if(success > sdiffs)            {       trashCode += 's';       }
717                                         else{ currentSeqsDiffs += success;  }
718
719                                 }
720                 
721                                 if(numFPrimers != 0){
722                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
723                                         if(success > pdiffs)            {       trashCode += 'f';       }
724                                         else{ currentSeqsDiffs += success;  }
725                                 }
726                                 
727                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
728                                 
729                                 if(numRPrimers != 0){
730                                         success = trimOligos.stripReverse(currSeq, currQual);
731                                         if(!success)                            {       trashCode += 'r';       }
732                                 }
733
734                                 if(keepFirst != 0){
735                                         success = keepFirstTrim(currSeq, currQual);
736                                 }
737                                 
738                                 if(removeLast != 0){
739                                         success = removeLastTrim(currSeq, currQual);
740                                         if(!success)                            {       trashCode += 'l';       }
741                                 }
742
743                                 
744                                 if(qFileName != ""){
745                                         int origLength = currSeq.getNumBases();
746                                         
747                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
748                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
749                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
750                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
751                                         else                                            {       success = 1;                            }
752                                         
753                                         //you don't want to trim, if it fails above then scrap it
754                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
755                                         
756                                         if(!success)                            {       trashCode += 'q';       }
757                                 }                               
758                 
759                                 if(minLength > 0 || maxLength > 0){
760                                         success = cullLength(currSeq);
761                                         if(!success)                            {       trashCode += 'l';       }
762                                 }
763                                 if(maxHomoP > 0){
764                                         success = cullHomoP(currSeq);
765                                         if(!success)                            {       trashCode += 'h';       }
766                                 }
767                                 if(maxAmbig != -1){
768                                         success = cullAmbigs(currSeq);
769                                         if(!success)                            {       trashCode += 'n';       }
770                                 }
771                                 
772                                 if(flip){               // should go last                       
773                                         currSeq.reverseComplement();
774                                         if(qFileName != ""){
775                                                 currQual.flipQScores(); 
776                                         }
777                                 }
778                                 
779                 if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + ", trashcode= " + trashCode); if (trashCode.length() != 0) { m->mothurOutEndLine(); } }
780                 
781                                 if(trashCode.length() == 0){
782                                         currSeq.setAligned(currSeq.getUnaligned());
783                                         currSeq.printSequence(trimFASTAFile);
784                                         
785                                         if(qFileName != ""){
786                                                 currQual.printQScores(trimQualFile);
787                                         }
788                                         
789                     
790                                         if(nameFile != ""){
791                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
792                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
793                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
794                                         }
795                     
796                     int numRedundants = 0;
797                     if (countfile != "") {
798                         map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
799                         if (itCount != nameCount.end()) { 
800                             trimCountFile << itCount->first << '\t' << itCount->second << endl;
801                             numRedundants = itCount->second-1;
802                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
803                     }
804                                         
805                                         if (createGroup) {
806                                                 if(barcodes.size() != 0){
807                                                         string thisGroup = barcodeNameVector[barcodeIndex];
808                                                         if (primers.size() != 0) { 
809                                                                 if (primerNameVector[primerIndex] != "") { 
810                                                                         if(thisGroup != "") {
811                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
812                                                                         }else {
813                                                                                 thisGroup = primerNameVector[primerIndex]; 
814                                                                         }
815                                                                 } 
816                                                         }
817                                                         
818                             if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
819                             
820                                                         if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
821                             else {   groupMap[currSeq.getName()] = thisGroup; }
822                                                         
823                                                         if (nameFile != "") {
824                                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
825                                                                 if (itName != nameMap.end()) { 
826                                                                         vector<string> thisSeqsNames; 
827                                                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
828                                     numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
829                                                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
830                                                                                 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
831                                                                         }
832                                                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
833                                                         }
834                                                         
835                                                         map<string, int>::iterator it = groupCounts.find(thisGroup);
836                                                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1 + numRedundants; }
837                                                         else { groupCounts[it->first] += (1 + numRedundants); }
838                                                                 
839                                                 }
840                                         }
841                                         
842                                         if(allFiles){
843                                                 ofstream output;
844                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
845                                                 currSeq.printSequence(output);
846                                                 output.close();
847                                                 
848                                                 if(qFileName != ""){
849                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
850                                                         currQual.printQScores(output);
851                                                         output.close();                                                 
852                                                 }
853                                                 
854                                                 if(nameFile != ""){
855                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
856                                                         if (itName != nameMap.end()) { 
857                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
858                                                                 output << itName->first << '\t' << itName->second << endl; 
859                                                                 output.close();
860                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
861                                                 }
862                                         }
863                                 }
864                                 else{
865                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
866                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
867                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
868                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
869                                         }
870                     if (countfile != "") {
871                         map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
872                         if (itCount != nameCount.end()) { 
873                             trimCountFile << itCount->first << '\t' << itCount->second << endl;
874                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
875                     }
876                     
877                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
878                                         currSeq.setUnaligned(origSeq);
879                                         currSeq.setAligned(origSeq);
880                                         currSeq.printSequence(scrapFASTAFile);
881                                         if(qFileName != ""){
882                                                 currQual.printQScores(scrapQualFile);
883                                         }
884                                 }
885                                 count++;
886                         }
887                         
888                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
889                                 unsigned long long pos = inFASTA.tellg();
890                                 if ((pos == -1) || (pos >= line.end)) { break; }
891                         
892                         #else
893                                 if (inFASTA.eof()) { break; }
894                         #endif
895                         
896                         //report progress
897                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
898                         
899                 }
900                 //report progress
901                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
902                 
903                 
904                 inFASTA.close();
905                 trimFASTAFile.close();
906                 scrapFASTAFile.close();
907                 if (createGroup) {       outGroupsFile.close();   }
908                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
909                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
910         if(countfile != "")     {       scrapCountFile.close(); trimCountFile.close();  }
911                 
912                 return count;
913         }
914         catch(exception& e) {
915                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
916                 exit(1);
917         }
918 }
919
920 /**************************************************************************************************/
921
922 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string trimCountFileName, string scrapCountFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
923         try {
924         
925         int process = 1;
926                 int exitCommand = 1;
927                 processIDS.clear();
928                 
929 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
930                                 //loop through and create all the processes you want
931                 while (process != processors) {
932                         int pid = fork();
933                         
934                         if (pid > 0) {
935                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
936                                 process++;
937                         }else if (pid == 0){
938                                 
939                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
940                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
941                                 vector<vector<string> > tempNameFileNames = nameFileNames;
942
943                                 if(allFiles){
944                                         ofstream temp;
945
946                                         for(int i=0;i<tempFASTAFileNames.size();i++){
947                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
948                                                         if (tempFASTAFileNames[i][j] != "") {
949                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
950                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
951
952                                                                 if(qFileName != ""){
953                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
954                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
955                                                                 }
956                                                                 if(nameFile != ""){
957                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
958                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
959                                                                 }
960                                                         }
961                                                 }
962                                         }
963                                 }
964                                                         
965                                 driverCreateTrim(filename,
966                                                                  qFileName,
967                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
968                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
969                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
970                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
971                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
972                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
973                                  (trimCountFileName + toString(getpid()) + ".temp"),
974                                                                  (scrapCountFileName + toString(getpid()) + ".temp"),
975                                                                  (groupFile + toString(getpid()) + ".temp"),
976                                                                  tempFASTAFileNames,
977                                                                  tempPrimerQualFileNames,
978                                                                  tempNameFileNames,
979                                                                  lines[process],
980                                                                  qLines[process]);
981                 
982                 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); }
983                                 
984                                 //pass groupCounts to parent
985                                 if(createGroup){
986                                         ofstream out;
987                                         string tempFile = filename + toString(getpid()) + ".num.temp";
988                                         m->openOutputFile(tempFile, out);
989                                         
990                                         out << groupCounts.size() << endl;
991                                         
992                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
993                                                 out << it->first << '\t' << it->second << endl;
994                                         }
995                     
996                     out << groupMap.size() << endl;
997                     for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
998                                                 out << it->first << '\t' << it->second << endl;
999                                         }
1000                                         out.close();
1001                                 }
1002                                 exit(0);
1003                         }else { 
1004                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1005                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1006                                 exit(0);
1007                         }
1008                 }
1009                 
1010                 //parent do my part
1011                 ofstream temp;
1012                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
1013                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
1014                 if(qFileName != ""){
1015                         m->openOutputFile(trimQualFileName, temp);              temp.close();
1016                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
1017                 }
1018                 if (nameFile != "") {
1019                         m->openOutputFile(trimNameFileName, temp);              temp.close();
1020                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
1021                 }
1022         if (countfile != "") {
1023                         m->openOutputFile(trimCountFileName, temp);             temp.close();
1024                         m->openOutputFile(scrapCountFileName, temp);            temp.close();
1025                 }
1026
1027                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, trimCountFileName, scrapCountFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
1028                 
1029                 //force parent to wait until all the processes are done
1030                 for (int i=0;i<processIDS.size();i++) { 
1031                         int temp = processIDS[i];
1032                         wait(&temp);
1033                 }
1034 #else
1035         //////////////////////////////////////////////////////////////////////////////////////////////////////
1036                 //Windows version shared memory, so be careful when passing variables through the trimData struct. 
1037                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1038                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1039                 
1040                 vector<trimData*> pDataArray; 
1041                 DWORD   dwThreadIdArray[processors-1];
1042                 HANDLE  hThreadArray[processors-1]; 
1043                 
1044                 //Create processor worker threads.
1045                 for( int h=0; h<processors-1; h++){
1046                         
1047             string extension = "";
1048                         if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
1049             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1050             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1051             vector<vector<string> > tempNameFileNames = nameFileNames;
1052             
1053             if(allFiles){
1054                 ofstream temp;
1055                 
1056                 for(int i=0;i<tempFASTAFileNames.size();i++){
1057                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
1058                         if (tempFASTAFileNames[i][j] != "") {
1059                             tempFASTAFileNames[i][j] += extension;
1060                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
1061                             
1062                             if(qFileName != ""){
1063                                 tempPrimerQualFileNames[i][j] += extension;
1064                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
1065                             }
1066                             if(nameFile != ""){
1067                                 tempNameFileNames[i][j] += extension;
1068                                 m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
1069                             }
1070                         }
1071                     }
1072                 }
1073             }
1074
1075             
1076                         trimData* tempTrim = new trimData(filename,
1077                                               qFileName, nameFile, countfile,
1078                                               (trimFASTAFileName+extension),
1079                                               (scrapFASTAFileName+extension),
1080                                               (trimQualFileName+extension),
1081                                               (scrapQualFileName+extension),
1082                                               (trimNameFileName+extension),
1083                                               (scrapNameFileName+extension),
1084                                               (trimCountFileName+extension),
1085                                               (scrapCountFileName+extension),
1086                                               (groupFile+extension),
1087                                               tempFASTAFileNames,
1088                                               tempPrimerQualFileNames,
1089                                               tempNameFileNames,
1090                                               lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
1091                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, 
1092                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
1093                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
1094                                              minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap, nameCount);
1095                         pDataArray.push_back(tempTrim);
1096             
1097                         hThreadArray[h] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
1098                 }
1099         
1100         //parent do my part
1101                 ofstream temp;
1102                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
1103                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
1104                 if(qFileName != ""){
1105                         m->openOutputFile(trimQualFileName, temp);              temp.close();
1106                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
1107                 }
1108                 if (nameFile != "") {
1109                         m->openOutputFile(trimNameFileName, temp);              temp.close();
1110                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
1111                 }
1112         vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1113         vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1114         vector<vector<string> > tempNameFileNames = nameFileNames;
1115         if(allFiles){
1116             ofstream temp;
1117             string extension = toString(processors-1) + ".temp";
1118             for(int i=0;i<tempFASTAFileNames.size();i++){
1119                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1120                     if (tempFASTAFileNames[i][j] != "") {
1121                         tempFASTAFileNames[i][j] += extension;
1122                         m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
1123                         
1124                         if(qFileName != ""){
1125                             tempPrimerQualFileNames[i][j] += extension;
1126                             m->openOutputFile(tempPrimerQualFileNames[i][j], temp);             temp.close();
1127                         }
1128                         if(nameFile != ""){
1129                             tempNameFileNames[i][j] += extension;
1130                             m->openOutputFile(tempNameFileNames[i][j], temp);           temp.close();
1131                         }
1132                     }
1133                 }
1134             }
1135         }
1136         
1137                 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames, tempNameFileNames, lines[processors-1], qLines[processors-1]);
1138         processIDS.push_back(processors-1);
1139
1140         
1141                 //Wait until all threads have terminated.
1142                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1143                 
1144                 //Close all thread handles and free memory allocations.
1145                 for(int i=0; i < pDataArray.size(); i++){
1146                         for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
1147                 map<string, int>::iterator it2 = groupCounts.find(it->first);
1148                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
1149                 else { groupCounts[it->first] += it->second; }
1150             }
1151             for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
1152                 map<string, string>::iterator it2 = groupMap.find(it->first);
1153                 if (it2 == groupMap.end()) {    groupMap[it->first] = it->second; }
1154                 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
1155             }
1156             CloseHandle(hThreadArray[i]);
1157                         delete pDataArray[i];
1158                 }
1159         
1160 #endif          
1161         
1162         
1163         //append files
1164                 for(int i=0;i<processIDS.size();i++){
1165                         
1166                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1167                         
1168                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
1169                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
1170                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1171                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1172                         
1173                         if(qFileName != ""){
1174                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1175                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1176                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1177                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1178                         }
1179                         
1180                         if(nameFile != ""){
1181                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1182                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1183                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1184                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1185                         }
1186             
1187             if(countfile != ""){
1188                                 m->appendFiles((trimCountFileName + toString(processIDS[i]) + ".temp"), trimCountFileName);
1189                                 m->mothurRemove((trimCountFileName + toString(processIDS[i]) + ".temp"));
1190                                 m->appendFiles((scrapCountFileName + toString(processIDS[i]) + ".temp"), scrapCountFileName);
1191                                 m->mothurRemove((scrapCountFileName + toString(processIDS[i]) + ".temp"));
1192                         }
1193                         
1194                         if((createGroup)&&(countfile == "")){
1195                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1196                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1197                         }
1198                         
1199                         
1200                         if(allFiles){
1201                                 for(int j=0;j<fastaFileNames.size();j++){
1202                                         for(int k=0;k<fastaFileNames[j].size();k++){
1203                                                 if (fastaFileNames[j][k] != "") {
1204                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1205                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1206                                                         
1207                                                         if(qFileName != ""){
1208                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1209                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1210                                                         }
1211                                                         
1212                                                         if(nameFile != ""){
1213                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1214                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1215                                                         }
1216                                                 }
1217                                         }
1218                                 }
1219                         }
1220                         
1221             #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1222                         if(createGroup){
1223                                 ifstream in;
1224                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1225                                 m->openInputFile(tempFile, in);
1226                                 int tempNum;
1227                                 string group;
1228                                 
1229                                 in >> tempNum; m->gobble(in);
1230                                 
1231                                 if (tempNum != 0) {
1232                                         for (int i = 0; i < tempNum; i++) { 
1233                         int groupNum;
1234                                                 in >> group >> groupNum; m->gobble(in);
1235                         
1236                                                 map<string, int>::iterator it = groupCounts.find(group);
1237                                                 if (it == groupCounts.end()) {  groupCounts[group] = groupNum; }
1238                                                 else { groupCounts[it->first] += groupNum; }
1239                                         }
1240                                 }
1241                 in >> tempNum; m->gobble(in);
1242                 if (tempNum != 0) {
1243                                         for (int i = 0; i < tempNum; i++) { 
1244                         string group, seqName;
1245                                                 in >> seqName >> group; m->gobble(in);
1246                         
1247                                                 map<string, string>::iterator it = groupMap.find(seqName);
1248                                                 if (it == groupMap.end()) {     groupMap[seqName] = group; }
1249                                                 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
1250                                         }
1251                                 }
1252                 
1253                                 in.close(); m->mothurRemove(tempFile);
1254                         }
1255             #endif
1256                 }
1257
1258         return exitCommand;
1259         }
1260         catch(exception& e) {
1261                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1262                 exit(1);
1263         }
1264 }
1265
1266 /**************************************************************************************************/
1267
1268 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1269         try {
1270         
1271         vector<unsigned long long> fastaFilePos;
1272                 vector<unsigned long long> qfileFilePos;
1273                 
1274                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1275                 //set file positions for fasta file
1276                 fastaFilePos = m->divideFile(filename, processors);
1277                 
1278                 //get name of first sequence in each chunk
1279                 map<string, int> firstSeqNames;
1280                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1281                         ifstream in;
1282                         m->openInputFile(filename, in);
1283                         in.seekg(fastaFilePos[i]);
1284                 
1285                         Sequence temp(in); 
1286                         firstSeqNames[temp.getName()] = i;
1287                 
1288                         in.close();
1289                 }
1290                 
1291                 if(qfilename != "")     {
1292             //seach for filePos of each first name in the qfile and save in qfileFilePos
1293             ifstream inQual;
1294             m->openInputFile(qfilename, inQual);
1295             
1296             string input;
1297             while(!inQual.eof()){       
1298                 input = m->getline(inQual);
1299                 
1300                 if (input.length() != 0) {
1301                     if(input[0] == '>'){ //this is a sequence name line
1302                         istringstream nameStream(input);
1303                         
1304                         string sname = "";  nameStream >> sname;
1305                         sname = sname.substr(1);
1306                         
1307                         map<string, int>::iterator it = firstSeqNames.find(sname);
1308                         
1309                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
1310                             unsigned long long pos = inQual.tellg(); 
1311                             qfileFilePos.push_back(pos - input.length() - 1);   
1312                             firstSeqNames.erase(it);
1313                         }
1314                     }
1315                 }
1316                 
1317                 if (firstSeqNames.size() == 0) { break; }
1318             }
1319             inQual.close();
1320             
1321             
1322             if (firstSeqNames.size() != 0) { 
1323                 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1324                     m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1325                 }
1326                 qFileName = "";
1327                 return processors;
1328             }
1329             
1330             //get last file position of qfile
1331             FILE * pFile;
1332             unsigned long long size;
1333             
1334             //get num bytes in file
1335             pFile = fopen (qfilename.c_str(),"rb");
1336             if (pFile==NULL) perror ("Error opening file");
1337             else{
1338                 fseek (pFile, 0, SEEK_END);
1339                 size=ftell (pFile);
1340                 fclose (pFile);
1341             }
1342             
1343             qfileFilePos.push_back(size);
1344         }
1345         
1346         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1347             if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); }
1348                         lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1349                         if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
1350                 }       
1351                 if(qfilename == "")     {       qLines = lines; } //files with duds
1352                 
1353                 return processors;
1354                 
1355                 #else
1356             
1357         if (processors == 1) { //save time
1358                         //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1359                         //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1360             lines.push_back(linePair(0, 1000));
1361             if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
1362         }else{
1363             int numFastaSeqs = 0;
1364             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
1365             if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1366         
1367             if (qfilename != "") { 
1368                 int numQualSeqs = 0;
1369                 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
1370                 
1371                 if (numFastaSeqs != numQualSeqs) {
1372                     m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
1373                 }
1374             }
1375         
1376             //figure out how many sequences you have to process
1377             int numSeqsPerProcessor = numFastaSeqs / processors;
1378             for (int i = 0; i < processors; i++) {
1379                 int startIndex =  i * numSeqsPerProcessor;
1380                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
1381                 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1382                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1383             }
1384         }
1385             if(qfilename == "") {       qLines = lines; } //files with duds
1386                         return 1;
1387                 
1388                 #endif
1389         }
1390         catch(exception& e) {
1391                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1392                 exit(1);
1393         }
1394 }
1395
1396 //***************************************************************************************************************
1397
1398 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1399         try {
1400                 ifstream inOligos;
1401                 m->openInputFile(oligoFile, inOligos);
1402                 
1403                 ofstream test;
1404                 
1405                 string type, oligo, group;
1406
1407                 int indexPrimer = 0;
1408                 int indexBarcode = 0;
1409                 
1410                 while(!inOligos.eof()){
1411
1412                         inOligos >> type; 
1413             
1414                         if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
1415             
1416                         if(type[0] == '#'){
1417                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1418                                 m->gobble(inOligos);
1419                         }
1420                         else{
1421                                 m->gobble(inOligos);
1422                                 //make type case insensitive
1423                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1424                                 
1425                                 inOligos >> oligo;
1426                 
1427                 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
1428                                 
1429                                 for(int i=0;i<oligo.length();i++){
1430                                         oligo[i] = toupper(oligo[i]);
1431                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1432                                 }
1433                                 
1434                                 if(type == "FORWARD"){
1435                                         group = "";
1436                                         
1437                                         // get rest of line in case there is a primer name
1438                                         while (!inOligos.eof()) {       
1439                                                 char c = inOligos.get(); 
1440                                                 if (c == 10 || c == 13){        break;  }
1441                                                 else if (c == 32 || c == 9){;} //space or tab
1442                                                 else {  group += c;  }
1443                                         } 
1444                                         
1445                                         //check for repeat barcodes
1446                                         map<string, int>::iterator itPrime = primers.find(oligo);
1447                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1448                                         
1449                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); }  }
1450                     
1451                                         primers[oligo]=indexPrimer; indexPrimer++;              
1452                                         primerNameVector.push_back(group);
1453                                 }
1454                                 else if(type == "REVERSE"){
1455                                         //Sequence oligoRC("reverse", oligo);
1456                                         //oligoRC.reverseComplement();
1457                     string oligoRC = reverseOligo(oligo);
1458                                         revPrimer.push_back(oligoRC);
1459                                 }
1460                                 else if(type == "BARCODE"){
1461                                         inOligos >> group;
1462                                         
1463                                         //check for repeat barcodes
1464                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1465                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1466                     
1467                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1468                                         barcodeNameVector.push_back(group);
1469                                 }else if(type == "LINKER"){
1470                                         linker.push_back(oligo);
1471                                 }else if(type == "SPACER"){
1472                                         spacer.push_back(oligo);
1473                                 }
1474                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1475                         }
1476                         m->gobble(inOligos);
1477                 }       
1478                 inOligos.close();
1479                 
1480                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1481                 
1482                 //add in potential combos
1483                 if(barcodeNameVector.size() == 0){
1484                         barcodes[""] = 0;
1485                         barcodeNameVector.push_back("");                        
1486                 }
1487                 
1488                 if(primerNameVector.size() == 0){
1489                         primers[""] = 0;
1490                         primerNameVector.push_back("");                 
1491                 }
1492                 
1493                 fastaFileNames.resize(barcodeNameVector.size());
1494                 for(int i=0;i<fastaFileNames.size();i++){
1495                         fastaFileNames[i].assign(primerNameVector.size(), "");
1496                 }
1497                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1498                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1499                 
1500                 if(allFiles){
1501                         set<string> uniqueNames; //used to cleanup outputFileNames
1502                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1503                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1504                                         
1505                                         string primerName = primerNameVector[itPrimer->second];
1506                                         string barcodeName = barcodeNameVector[itBar->second];
1507                                         
1508                                         string comboGroupName = "";
1509                                         string fastaFileName = "";
1510                                         string qualFileName = "";
1511                                         string nameFileName = "";
1512                     string countFileName = "";
1513                                         
1514                                         if(primerName == ""){
1515                                                 comboGroupName = barcodeNameVector[itBar->second];
1516                                         }
1517                                         else{
1518                                                 if(barcodeName == ""){
1519                                                         comboGroupName = primerNameVector[itPrimer->second];
1520                                                 }
1521                                                 else{
1522                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1523                                                 }
1524                                         }
1525                                         
1526                                         
1527                                         ofstream temp;
1528                     map<string, string> variables; 
1529                     variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1530                     variables["[tag]"] = comboGroupName;
1531                                         fastaFileName = getOutputFileName("fasta", variables);
1532                                         if (uniqueNames.count(fastaFileName) == 0) {
1533                                                 outputNames.push_back(fastaFileName);
1534                                                 outputTypes["fasta"].push_back(fastaFileName);
1535                                                 uniqueNames.insert(fastaFileName);
1536                                         }
1537                                         
1538                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1539                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1540                                         
1541                                         if(qFileName != ""){
1542                         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1543                                                 qualFileName = getOutputFileName("qfile", variables);
1544                                                 if (uniqueNames.count(qualFileName) == 0) {
1545                                                         outputNames.push_back(qualFileName);
1546                                                         outputTypes["qfile"].push_back(qualFileName);
1547                                                 }
1548                                                 
1549                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1550                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1551                                         }
1552                                         
1553                                         if(nameFile != ""){
1554                         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1555                                                 nameFileName = getOutputFileName("name", variables);
1556                                                 if (uniqueNames.count(nameFileName) == 0) {
1557                                                         outputNames.push_back(nameFileName);
1558                                                         outputTypes["name"].push_back(nameFileName);
1559                                                 }
1560                                                 
1561                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1562                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1563                                         }
1564                                 }
1565                         }
1566                 }
1567                 numFPrimers = primers.size();
1568                 numRPrimers = revPrimer.size();
1569         numLinkers = linker.size();
1570         numSpacers = spacer.size();
1571                 
1572                 bool allBlank = true;
1573                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1574                         if (barcodeNameVector[i] != "") {
1575                                 allBlank = false;
1576                                 break;
1577                         }
1578                 }
1579                 for (int i = 0; i < primerNameVector.size(); i++) {
1580                         if (primerNameVector[i] != "") {
1581                                 allBlank = false;
1582                                 break;
1583                         }
1584                 }
1585
1586                 if (allBlank) {
1587                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1588                         allFiles = false;
1589                         return false;
1590                 }
1591                 
1592                 return true;
1593                 
1594         }
1595         catch(exception& e) {
1596                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1597                 exit(1);
1598         }
1599 }
1600 //***************************************************************************************************************
1601
1602 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1603         try {
1604                 bool success = 1;
1605                 if(qscores.getName() != ""){
1606                         qscores.trimQScores(-1, keepFirst);
1607                 }
1608                 sequence.trim(keepFirst);
1609                 return success;
1610         }
1611         catch(exception& e) {
1612                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1613                 exit(1);
1614         }
1615         
1616 }       
1617
1618 //***************************************************************************************************************
1619
1620 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1621         try {
1622                 bool success = 0;
1623                 
1624                 int length = sequence.getNumBases() - removeLast;
1625                 
1626                 if(length > 0){
1627                         if(qscores.getName() != ""){
1628                                 qscores.trimQScores(-1, length);
1629                         }
1630                         sequence.trim(length);
1631                         success = 1;
1632                 }
1633                 else{
1634                         success = 0;
1635                 }
1636
1637                 return success;
1638         }
1639         catch(exception& e) {
1640                 m->errorOut(e, "removeLastTrim", "countDiffs");
1641                 exit(1);
1642         }
1643         
1644 }       
1645
1646 //***************************************************************************************************************
1647
1648 bool TrimSeqsCommand::cullLength(Sequence& seq){
1649         try {
1650         
1651                 int length = seq.getNumBases();
1652                 bool success = 0;       //guilty until proven innocent
1653                 
1654                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1655                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1656                 else                                                                                            {       success = 0;    }
1657                 
1658                 return success;
1659         
1660         }
1661         catch(exception& e) {
1662                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1663                 exit(1);
1664         }
1665         
1666 }
1667
1668 //***************************************************************************************************************
1669
1670 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1671         try {
1672                 int longHomoP = seq.getLongHomoPolymer();
1673                 bool success = 0;       //guilty until proven innocent
1674                 
1675                 if(longHomoP <= maxHomoP){      success = 1;    }
1676                 else                                    {       success = 0;    }
1677                 
1678                 return success;
1679         }
1680         catch(exception& e) {
1681                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1682                 exit(1);
1683         }
1684         
1685 }
1686 //********************************************************************/
1687 string TrimSeqsCommand::reverseOligo(string oligo){
1688         try {
1689         string reverse = "";
1690         
1691         for(int i=oligo.length()-1;i>=0;i--){
1692             
1693             if(oligo[i] == 'A')         {       reverse += 'T'; }
1694             else if(oligo[i] == 'T'){   reverse += 'A'; }
1695             else if(oligo[i] == 'U'){   reverse += 'A'; }
1696             
1697             else if(oligo[i] == 'G'){   reverse += 'C'; }
1698             else if(oligo[i] == 'C'){   reverse += 'G'; }
1699             
1700             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1701             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1702             
1703             else if(oligo[i] == 'M'){   reverse += 'K'; }
1704             else if(oligo[i] == 'K'){   reverse += 'M'; }
1705             
1706             else if(oligo[i] == 'W'){   reverse += 'W'; }
1707             else if(oligo[i] == 'S'){   reverse += 'S'; }
1708             
1709             else if(oligo[i] == 'B'){   reverse += 'V'; }
1710             else if(oligo[i] == 'V'){   reverse += 'B'; }
1711             
1712             else if(oligo[i] == 'D'){   reverse += 'H'; }
1713             else if(oligo[i] == 'H'){   reverse += 'D'; }
1714             
1715             else                                                {       reverse += 'N'; }
1716         }
1717         
1718         
1719         return reverse;
1720     }
1721         catch(exception& e) {
1722                 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1723                 exit(1);
1724         }
1725 }
1726
1727 //***************************************************************************************************************
1728
1729 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1730         try {
1731                 int numNs = seq.getAmbigBases();
1732                 bool success = 0;       //guilty until proven innocent
1733                 
1734                 if(numNs <= maxAmbig)   {       success = 1;    }
1735                 else                                    {       success = 0;    }
1736                 
1737                 return success;
1738         }
1739         catch(exception& e) {
1740                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1741                 exit(1);
1742         }
1743         
1744 }
1745 //***************************************************************************************************************