]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
metastats in progress
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::setParameters(){        
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
22                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
23                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
24                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
25                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
26                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
27                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
28                 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
29                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
30                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
31                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
32                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
33                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
34                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
35                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
36                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
37                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
38                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
39                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
40                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
41                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
42                         
43                 vector<string> myArray;
44                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 string TrimSeqsCommand::getHelpString(){        
54         try {
55                 string helpString = "";
56                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
57                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
58                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
59                 helpString += "The fasta parameter is required.\n";
60                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
61                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
62                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
63                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
64                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
65                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
66                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
67                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n";
68                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
69                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
70                 helpString += "The qfile parameter allows you to provide a quality file.\n";
71                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
72                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
73                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
74                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
75                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
76                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
77                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
78                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
79                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
80                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
81                 helpString += "The trim.seqs command should be in the following format: \n";
82                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
83                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
84                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
85                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
86                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
87                 return helpString;
88         }
89         catch(exception& e) {
90                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
91                 exit(1);
92         }
93 }
94
95
96 //**********************************************************************************************************************
97
98 TrimSeqsCommand::TrimSeqsCommand(){     
99         try {
100                 abort = true; calledHelp = true; 
101                 setParameters();
102                 vector<string> tempOutNames;
103                 outputTypes["fasta"] = tempOutNames;
104                 outputTypes["qfile"] = tempOutNames;
105                 outputTypes["group"] = tempOutNames;
106                 outputTypes["name"] = tempOutNames;
107         }
108         catch(exception& e) {
109                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
110                 exit(1);
111         }
112 }
113 //***************************************************************************************************************
114
115 TrimSeqsCommand::TrimSeqsCommand(string option)  {
116         try {
117                 
118                 abort = false; calledHelp = false;   
119                 comboStarts = 0;
120                 
121                 //allow user to run help
122                 if(option == "help") { help(); abort = true; calledHelp = true; }
123                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
124                 
125                 else {
126                         vector<string> myArray = setParameters();
127                         
128                         OptionParser parser(option);
129                         map<string,string> parameters = parser.getParameters();
130                         
131                         ValidParameters validParameter;
132                         map<string,string>::iterator it;
133                         
134                         //check to make sure all parameters are valid for command
135                         for (it = parameters.begin(); it != parameters.end(); it++) { 
136                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
137                         }
138                         
139                         //initialize outputTypes
140                         vector<string> tempOutNames;
141                         outputTypes["fasta"] = tempOutNames;
142                         outputTypes["qfile"] = tempOutNames;
143                         outputTypes["group"] = tempOutNames;
144                         outputTypes["name"] = tempOutNames;
145                         
146                         //if the user changes the input directory command factory will send this info to us in the output parameter 
147                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
148                         if (inputDir == "not found"){   inputDir = "";          }
149                         else {
150                                 string path;
151                                 it = parameters.find("fasta");
152                                 //user has given a template file
153                                 if(it != parameters.end()){ 
154                                         path = m->hasPath(it->second);
155                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
157                                 }
158                                 
159                                 it = parameters.find("oligos");
160                                 //user has given a template file
161                                 if(it != parameters.end()){ 
162                                         path = m->hasPath(it->second);
163                                         //if the user has not given a path then, add inputdir. else leave path alone.
164                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
165                                 }
166                                 
167                                 it = parameters.find("qfile");
168                                 //user has given a template file
169                                 if(it != parameters.end()){ 
170                                         path = m->hasPath(it->second);
171                                         //if the user has not given a path then, add inputdir. else leave path alone.
172                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
173                                 }
174                                 
175                                 it = parameters.find("name");
176                                 //user has given a template file
177                                 if(it != parameters.end()){ 
178                                         path = m->hasPath(it->second);
179                                         //if the user has not given a path then, add inputdir. else leave path alone.
180                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
181                                 }
182                                 
183                         }
184
185                         
186                         //check for required parameters
187                         fastaFile = validParameter.validFile(parameters, "fasta", true);
188                         if (fastaFile == "not found") {                                 
189                                 fastaFile = m->getFastaFile(); 
190                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
191                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
192                         }else if (fastaFile == "not open") { abort = true; }    
193                         else { m->setFastaFile(fastaFile); }
194                         
195                         //if the user changes the output directory command factory will send this info to us in the output parameter 
196                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
197                                 outputDir = ""; 
198                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
199                         }
200                 
201                         
202                         //check for optional parameter and set defaults
203                         // ...at some point should added some additional type checking...
204                         string temp;
205                         temp = validParameter.validFile(parameters, "flip", false);
206                         if (temp == "not found")    {   flip = 0;       }
207                         else {  flip = m->isTrue(temp);         }
208                 
209                         temp = validParameter.validFile(parameters, "oligos", true);
210                         if (temp == "not found"){       oligoFile = "";         }
211                         else if(temp == "not open"){    abort = true;   } 
212                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
213                         
214                         
215                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
216                         convert(temp, maxAmbig);  
217
218                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
219                         convert(temp, maxHomoP);  
220
221                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
222                         convert(temp, minLength); 
223                         
224                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
225                         convert(temp, maxLength);
226                         
227                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
228                         convert(temp, bdiffs);
229                         
230                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
231                         convert(temp, pdiffs);
232                         
233                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
234                         convert(temp, tdiffs);
235                         
236                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
237                         
238                         temp = validParameter.validFile(parameters, "qfile", true);     
239                         if (temp == "not found")        {       qFileName = "";         }
240                         else if(temp == "not open")     {       abort = true;           }
241                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
242                         
243                         temp = validParameter.validFile(parameters, "name", true);      
244                         if (temp == "not found")        {       nameFile = "";          }
245                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
246                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
247                         
248                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
249                         convert(temp, qThreshold);
250                         
251                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
252                         qtrim = m->isTrue(temp);
253
254                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
255                         convert(temp, qRollAverage);
256
257                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
258                         convert(temp, qWindowAverage);
259
260                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
261                         convert(temp, qWindowSize);
262
263                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
264                         convert(temp, qWindowStep);
265
266                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
267                         convert(temp, qAverage);
268
269                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
270                         convert(temp, keepFirst);
271
272                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
273                         convert(temp, removeLast);
274                         
275                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
276                         allFiles = m->isTrue(temp);
277                         
278                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
279                         m->setProcessors(temp);
280                         convert(temp, processors); 
281                         
282                         
283                         if(allFiles && (oligoFile == "")){
284                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
285                         }
286                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
287                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
288                                 qAverage=0;
289                                 qThreshold=0;
290                         }
291                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
292                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
293                                 abort = true;
294                         }
295                 }
296
297         }
298         catch(exception& e) {
299                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
300                 exit(1);
301         }
302 }
303 //***************************************************************************************************************
304
305 int TrimSeqsCommand::execute(){
306         try{
307         
308                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
309                 
310                 numFPrimers = 0;  //this needs to be initialized
311                 numRPrimers = 0;
312                 createGroup = false;
313                 vector<vector<string> > fastaFileNames;
314                 vector<vector<string> > qualFileNames;
315                 vector<vector<string> > nameFileNames;
316                 
317                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
318                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
319                 
320                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
321                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
322                 
323                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
324                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
325                 
326                 if (qFileName != "") {
327                         outputNames.push_back(trimQualFile);
328                         outputNames.push_back(scrapQualFile);
329                         outputTypes["qfile"].push_back(trimQualFile);
330                         outputTypes["qfile"].push_back(scrapQualFile); 
331                 }
332                 
333                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
334                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
335                 
336                 if (nameFile != "") {
337                         m->readNames(nameFile, nameMap);
338                         outputNames.push_back(trimNameFile);
339                         outputNames.push_back(scrapNameFile);
340                         outputTypes["name"].push_back(trimNameFile);
341                         outputTypes["name"].push_back(scrapNameFile); 
342                 }
343                 
344                 if (m->control_pressed) { return 0; }
345                 
346                 string outputGroupFileName;
347                 if(oligoFile != ""){
348                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
349                         if (createGroup) {
350                                 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
351                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
352                         }
353                 }
354                 
355                 vector<unsigned long long> fastaFilePos;
356                 vector<unsigned long long> qFilePos;
357                 
358                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
359                 
360                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
361                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
362                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
363                 }       
364                 if(qFileName == "")     {       qLines = lines; } //files with duds
365                 
366                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
367                                 if(processors == 1){
368                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
369                                 }else{
370                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
371                                 }       
372                 #else
373                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
374                 #endif
375                 
376                 if (m->control_pressed) {  return 0; }                  
377         
378                 if(allFiles){
379                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
380                         map<string, string>::iterator it;
381                         set<string> namesToRemove;
382                         for(int i=0;i<fastaFileNames.size();i++){
383                                 for(int j=0;j<fastaFileNames[0].size();j++){
384                                         if (fastaFileNames[i][j] != "") {
385                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
386                                                         if(m->isBlank(fastaFileNames[i][j])){
387                                                                 m->mothurRemove(fastaFileNames[i][j]);
388                                                                 namesToRemove.insert(fastaFileNames[i][j]);
389                                                         
390                                                                 if(qFileName != ""){
391                                                                         m->mothurRemove(qualFileNames[i][j]);
392                                                                         namesToRemove.insert(qualFileNames[i][j]);
393                                                                 }
394                                                                 
395                                                                 if(nameFile != ""){
396                                                                         m->mothurRemove(nameFileNames[i][j]);
397                                                                         namesToRemove.insert(nameFileNames[i][j]);
398                                                                 }
399                                                         }else{  
400                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
401                                                                 if (it == uniqueFastaNames.end()) {     
402                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
403                                                                 }       
404                                                         }
405                                                 }
406                                         }
407                                 }
408                         }
409                         
410                         //remove names for outputFileNames, just cleans up the output
411                         vector<string> outputNames2;
412                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
413                         outputNames = outputNames2;
414                         
415                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
416                                 ifstream in;
417                                 m->openInputFile(it->first, in);
418                                 
419                                 ofstream out;
420                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
421                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
422                                 m->openOutputFile(thisGroupName, out);
423                                 
424                                 while (!in.eof()){
425                                         if (m->control_pressed) { break; }
426                                         
427                                         Sequence currSeq(in); m->gobble(in);
428                                         out << currSeq.getName() << '\t' << it->second << endl;
429                                 }
430                                 in.close();
431                                 out.close();
432                         }
433                 }
434                 
435                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
436
437                 //output group counts
438                 m->mothurOutEndLine();
439                 int total = 0;
440                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
441                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
442                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
443                 }
444                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
445                 
446                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
447
448                 //set fasta file as new current fastafile
449                 string current = "";
450                 itTypes = outputTypes.find("fasta");
451                 if (itTypes != outputTypes.end()) {
452                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
453                 }
454                 
455                 itTypes = outputTypes.find("name");
456                 if (itTypes != outputTypes.end()) {
457                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
458                 }
459                 
460                 itTypes = outputTypes.find("qfile");
461                 if (itTypes != outputTypes.end()) {
462                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
463                 }
464                 
465                 itTypes = outputTypes.find("group");
466                 if (itTypes != outputTypes.end()) {
467                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
468                 }
469
470                 m->mothurOutEndLine();
471                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
472                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
473                 m->mothurOutEndLine();
474                 
475                 return 0;       
476                         
477         }
478         catch(exception& e) {
479                 m->errorOut(e, "TrimSeqsCommand", "execute");
480                 exit(1);
481         }
482 }
483                 
484 /**************************************************************************************/
485
486 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair* line, linePair* qline) {    
487                 
488         try {
489                 
490                 ofstream trimFASTAFile;
491                 m->openOutputFile(trimFileName, trimFASTAFile);
492                 
493                 ofstream scrapFASTAFile;
494                 m->openOutputFile(scrapFileName, scrapFASTAFile);
495                 
496                 ofstream trimQualFile;
497                 ofstream scrapQualFile;
498                 if(qFileName != ""){
499                         m->openOutputFile(trimQFileName, trimQualFile);
500                         m->openOutputFile(scrapQFileName, scrapQualFile);
501                 }
502                 
503                 ofstream trimNameFile;
504                 ofstream scrapNameFile;
505                 if(nameFile != ""){
506                         m->openOutputFile(trimNFileName, trimNameFile);
507                         m->openOutputFile(scrapNFileName, scrapNameFile);
508                 }
509                 
510                 
511                 ofstream outGroupsFile;
512                 if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
513                 if(allFiles){
514                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
515                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
516                                         if (fastaFileNames[i][j] != "") {
517                                                 ofstream temp;
518                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
519                                                 if(qFileName != ""){
520                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
521                                                 }
522                                                 
523                                                 if(nameFile != ""){
524                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
525                                                 }
526                                         }
527                                 }
528                         }
529                 }
530                 
531                 ifstream inFASTA;
532                 m->openInputFile(filename, inFASTA);
533                 inFASTA.seekg(line->start);
534                 
535                 ifstream qFile;
536                 if(qFileName != "")     {
537                         m->openInputFile(qFileName, qFile);
538                         qFile.seekg(qline->start);  
539                 }
540                 
541                 int count = 0;
542                 bool moreSeqs = 1;
543                 TrimOligos trimOligos(pdiffs, bdiffs, primers, barcodes, revPrimer);
544         
545                 while (moreSeqs) {
546                                 
547                         if (m->control_pressed) { 
548                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
549                                 if (createGroup) {       outGroupsFile.close();   }
550
551                                 if(qFileName != ""){
552                                         qFile.close();
553                                 }
554                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
555
556                                 return 0;
557                         }
558                         
559                         int success = 1;
560                         string trashCode = "";
561                         int currentSeqsDiffs = 0;
562
563                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
564                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
565                         QualityScores currQual;
566                         if(qFileName != ""){
567                                 currQual = QualityScores(qFile);  m->gobble(qFile);
568                         }
569                         
570                         string origSeq = currSeq.getUnaligned();
571                         if (origSeq != "") {
572                                 
573                                 int barcodeIndex = 0;
574                                 int primerIndex = 0;
575                                 
576                                 if(barcodes.size() != 0){
577                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
578                                         if(success > bdiffs)            {       trashCode += 'b';       }
579                                         else{ currentSeqsDiffs += success;  }
580                                 }
581                                 
582                                 if(numFPrimers != 0){
583                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex);
584                                         if(success > pdiffs)            {       trashCode += 'f';       }
585                                         else{ currentSeqsDiffs += success;  }
586                                 }
587                                 
588                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
589                                 
590                                 if(numRPrimers != 0){
591                                         success = trimOligos.stripReverse(currSeq, currQual);
592                                         if(!success)                            {       trashCode += 'r';       }
593                                 }
594
595                                 if(keepFirst != 0){
596                                         success = keepFirstTrim(currSeq, currQual);
597                                 }
598                                 
599                                 if(removeLast != 0){
600                                         success = removeLastTrim(currSeq, currQual);
601                                         if(!success)                            {       trashCode += 'l';       }
602                                 }
603
604                                 
605                                 if(qFileName != ""){
606                                         int origLength = currSeq.getNumBases();
607                                         
608                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
609                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
610                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
611                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
612                                         else                                            {       success = 1;                            }
613                                         
614                                         //you don't want to trim, if it fails above then scrap it
615                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
616                                         
617                                         if(!success)                            {       trashCode += 'q';       }
618                                 }                               
619                 
620                                 if(minLength > 0 || maxLength > 0){
621                                         success = cullLength(currSeq);
622                                         if(!success)                            {       trashCode += 'l';       }
623                                 }
624                                 if(maxHomoP > 0){
625                                         success = cullHomoP(currSeq);
626                                         if(!success)                            {       trashCode += 'h';       }
627                                 }
628                                 if(maxAmbig != -1){
629                                         success = cullAmbigs(currSeq);
630                                         if(!success)                            {       trashCode += 'n';       }
631                                 }
632                                 
633                                 if(flip){               // should go last                       
634                                         currSeq.reverseComplement();
635                                         if(qFileName != ""){
636                                                 currQual.flipQScores(); 
637                                         }
638                                 }
639                                 
640                                 if(trashCode.length() == 0){
641                                         currSeq.setAligned(currSeq.getUnaligned());
642                                         currSeq.printSequence(trimFASTAFile);
643                                         
644                                         if(qFileName != ""){
645                                                 currQual.printQScores(trimQualFile);
646                                         }
647                                         
648                                         if(nameFile != ""){
649                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
650                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
651                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
652                                         }
653                                         
654                                         if (createGroup) {
655                                                 if(barcodes.size() != 0){
656                                                         string thisGroup = barcodeNameVector[barcodeIndex];
657                                                         if (primers.size() != 0) { 
658                                                                 if (primerNameVector[primerIndex] != "") { 
659                                                                         if(thisGroup != "") {
660                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
661                                                                         }else {
662                                                                                 thisGroup = primerNameVector[primerIndex]; 
663                                                                         }
664                                                                 } 
665                                                         }
666                                                         
667                                                         outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
668                                                         
669                                                         if (nameFile != "") {
670                                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
671                                                                 if (itName != nameMap.end()) { 
672                                                                         vector<string> thisSeqsNames; 
673                                                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
674                                                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
675                                                                                 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
676                                                                         }
677                                                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
678                                                         }
679                                                         
680                                                         map<string, int>::iterator it = groupCounts.find(thisGroup);
681                                                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
682                                                         else { groupCounts[it->first]++; }
683                                                                 
684                                                 }
685                                         }
686                                         
687                                         if(allFiles){
688                                                 ofstream output;
689                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
690                                                 currSeq.printSequence(output);
691                                                 output.close();
692                                                 
693                                                 if(qFileName != ""){
694                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
695                                                         currQual.printQScores(output);
696                                                         output.close();                                                 
697                                                 }
698                                                 
699                                                 if(nameFile != ""){
700                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
701                                                         if (itName != nameMap.end()) { 
702                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
703                                                                 output << itName->first << '\t' << itName->second << endl; 
704                                                                 output.close();
705                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
706                                                 }
707                                         }
708                                 }
709                                 else{
710                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
711                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
712                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
713                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
714                                         }
715                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
716                                         currSeq.setUnaligned(origSeq);
717                                         currSeq.setAligned(origSeq);
718                                         currSeq.printSequence(scrapFASTAFile);
719                                         if(qFileName != ""){
720                                                 currQual.printQScores(scrapQualFile);
721                                         }
722                                 }
723                                 count++;
724                         }
725                         
726                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
727                                 unsigned long long pos = inFASTA.tellg();
728                                 if ((pos == -1) || (pos >= line->end)) { break; }
729                         
730                         #else
731                                 if (inFASTA.eof()) { break; }
732                         #endif
733                         
734                         //report progress
735                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
736                         
737                 }
738                 //report progress
739                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
740                 
741                 
742                 inFASTA.close();
743                 trimFASTAFile.close();
744                 scrapFASTAFile.close();
745                 if (createGroup) {       outGroupsFile.close();   }
746                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
747                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
748                 
749                 return count;
750         }
751         catch(exception& e) {
752                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
753                 exit(1);
754         }
755 }
756
757 /**************************************************************************************************/
758
759 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
760         try {
761 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
762                 int process = 1;
763                 int exitCommand = 1;
764                 processIDS.clear();
765                 
766                 //loop through and create all the processes you want
767                 while (process != processors) {
768                         int pid = fork();
769                         
770                         if (pid > 0) {
771                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
772                                 process++;
773                         }else if (pid == 0){
774                                 
775                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
776                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
777                                 vector<vector<string> > tempNameFileNames = nameFileNames;
778
779                                 if(allFiles){
780                                         ofstream temp;
781
782                                         for(int i=0;i<tempFASTAFileNames.size();i++){
783                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
784                                                         if (tempFASTAFileNames[i][j] != "") {
785                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
786                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
787
788                                                                 if(qFileName != ""){
789                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
790                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
791                                                                 }
792                                                                 if(nameFile != ""){
793                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
794                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
795                                                                 }
796                                                         }
797                                                 }
798                                         }
799                                 }
800                                                         
801                                 driverCreateTrim(filename,
802                                                                  qFileName,
803                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
804                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
805                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
806                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
807                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
808                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
809                                                                  (groupFile + toString(getpid()) + ".temp"),
810                                                                  tempFASTAFileNames,
811                                                                  tempPrimerQualFileNames,
812                                                                  tempNameFileNames,
813                                                                  lines[process],
814                                                                  qLines[process]);
815                                 
816                                 //pass groupCounts to parent
817                                 if(createGroup){
818                                         ofstream out;
819                                         string tempFile = filename + toString(getpid()) + ".num.temp";
820                                         m->openOutputFile(tempFile, out);
821                                         
822                                         out << groupCounts.size() << endl;
823                                         
824                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
825                                                 out << it->first << '\t' << it->second << endl;
826                                         }
827                                         out.close();
828                                 }
829                                 exit(0);
830                         }else { 
831                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
832                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
833                                 exit(0);
834                         }
835                 }
836                 
837                 //parent do my part
838                 ofstream temp;
839                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
840                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
841                 if(qFileName != ""){
842                         m->openOutputFile(trimQualFileName, temp);              temp.close();
843                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
844                 }
845                 if (nameFile != "") {
846                         m->openOutputFile(trimNameFileName, temp);              temp.close();
847                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
848                 }
849
850                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
851                 
852                 //force parent to wait until all the processes are done
853                 for (int i=0;i<processIDS.size();i++) { 
854                         int temp = processIDS[i];
855                         wait(&temp);
856                 }
857                 
858                 //append files
859                 for(int i=0;i<processIDS.size();i++){
860                         
861                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
862                         
863                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
864                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
865                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
866                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
867                         
868                         if(qFileName != ""){
869                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
870                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
871                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
872                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
873                         }
874                         
875                         if(nameFile != ""){
876                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
877                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
878                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
879                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
880                         }
881                         
882                         if(createGroup){
883                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
884                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
885                         }
886                         
887                         
888                         if(allFiles){
889                                 for(int j=0;j<fastaFileNames.size();j++){
890                                         for(int k=0;k<fastaFileNames[j].size();k++){
891                                                 if (fastaFileNames[j][k] != "") {
892                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
893                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
894                                                         
895                                                         if(qFileName != ""){
896                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
897                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
898                                                         }
899                                                         
900                                                         if(nameFile != ""){
901                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
902                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
903                                                         }
904                                                 }
905                                         }
906                                 }
907                         }
908                         
909                         if(createGroup){
910                                 ifstream in;
911                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
912                                 m->openInputFile(tempFile, in);
913                                 int tempNum;
914                                 string group;
915                                 
916                                 in >> tempNum; m->gobble(in);
917                                 
918                                 if (tempNum != 0) {
919                                         while (!in.eof()) { 
920                                                 in >> group >> tempNum; m->gobble(in);
921                                 
922                                                 map<string, int>::iterator it = groupCounts.find(group);
923                                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
924                                                 else { groupCounts[it->first] += tempNum; }
925                                         }
926                                 }
927                                 in.close(); m->mothurRemove(tempFile);
928                         }
929                         
930                 }
931         
932                 return exitCommand;
933 #endif          
934         }
935         catch(exception& e) {
936                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
937                 exit(1);
938         }
939 }
940
941 /**************************************************************************************************/
942
943 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long long>& fastaFilePos, vector<unsigned long long>& qfileFilePos) {
944         try {
945                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
946                 //set file positions for fasta file
947                 fastaFilePos = m->divideFile(filename, processors);
948                 
949                 if (qfilename == "") { return processors; }
950                 
951                 //get name of first sequence in each chunk
952                 map<string, int> firstSeqNames;
953                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
954                         ifstream in;
955                         m->openInputFile(filename, in);
956                         in.seekg(fastaFilePos[i]);
957                 
958                         Sequence temp(in); 
959                         firstSeqNames[temp.getName()] = i;
960                 
961                         in.close();
962                 }
963                                 
964                 //seach for filePos of each first name in the qfile and save in qfileFilePos
965                 ifstream inQual;
966                 m->openInputFile(qfilename, inQual);
967                 
968                 string input;
969                 while(!inQual.eof()){   
970                         input = m->getline(inQual);
971
972                         if (input.length() != 0) {
973                                 if(input[0] == '>'){ //this is a sequence name line
974                                         istringstream nameStream(input);
975                                         
976                                         string sname = "";  nameStream >> sname;
977                                         sname = sname.substr(1);
978                                         
979                                         map<string, int>::iterator it = firstSeqNames.find(sname);
980                                         
981                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
982                                                 unsigned long long pos = inQual.tellg(); 
983                                                 qfileFilePos.push_back(pos - input.length() - 1);       
984                                                 firstSeqNames.erase(it);
985                                         }
986                                 }
987                         }
988                         
989                         if (firstSeqNames.size() == 0) { break; }
990                 }
991                 inQual.close();
992                 
993                 
994                 if (firstSeqNames.size() != 0) { 
995                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
996                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
997                         }
998                         qFileName = "";
999                         return processors;
1000                 }
1001
1002                 //get last file position of qfile
1003                 FILE * pFile;
1004                 unsigned long long size;
1005                 
1006                 //get num bytes in file
1007                 pFile = fopen (qfilename.c_str(),"rb");
1008                 if (pFile==NULL) perror ("Error opening file");
1009                 else{
1010                         fseek (pFile, 0, SEEK_END);
1011                         size=ftell (pFile);
1012                         fclose (pFile);
1013                 }
1014                 
1015                 qfileFilePos.push_back(size);
1016                 
1017                 return processors;
1018                 
1019                 #else
1020                 
1021                         fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1022                         //get last file position of fastafile
1023                         FILE * pFile;
1024                         unsigned long long size;
1025                         
1026                         //get num bytes in file
1027                         pFile = fopen (filename.c_str(),"rb");
1028                         if (pFile==NULL) perror ("Error opening file");
1029                         else{
1030                                 fseek (pFile, 0, SEEK_END);
1031                                 size=ftell (pFile);
1032                                 fclose (pFile);
1033                         }
1034                         fastaFilePos.push_back(size);
1035                         
1036                         //get last file position of fastafile
1037                         FILE * qFile;
1038                         
1039                         //get num bytes in file
1040                         qFile = fopen (qfilename.c_str(),"rb");
1041                         if (qFile==NULL) perror ("Error opening file");
1042                         else{
1043                                 fseek (qFile, 0, SEEK_END);
1044                                 size=ftell (qFile);
1045                                 fclose (qFile);
1046                         }
1047                         qfileFilePos.push_back(size);
1048                 
1049                         return 1;
1050                 
1051                 #endif
1052         }
1053         catch(exception& e) {
1054                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1055                 exit(1);
1056         }
1057 }
1058
1059 //***************************************************************************************************************
1060
1061 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1062         try {
1063                 ifstream inOligos;
1064                 m->openInputFile(oligoFile, inOligos);
1065                 
1066                 ofstream test;
1067                 
1068                 string type, oligo, group;
1069
1070                 int indexPrimer = 0;
1071                 int indexBarcode = 0;
1072                 
1073                 while(!inOligos.eof()){
1074
1075                         inOligos >> type; 
1076                                         
1077                         if(type[0] == '#'){
1078                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1079                                 m->gobble(inOligos);
1080                         }
1081                         else{
1082                                 m->gobble(inOligos);
1083                                 //make type case insensitive
1084                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1085                                 
1086                                 inOligos >> oligo;
1087                                 
1088                                 for(int i=0;i<oligo.length();i++){
1089                                         oligo[i] = toupper(oligo[i]);
1090                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1091                                 }
1092                                 
1093                                 if(type == "FORWARD"){
1094                                         group = "";
1095                                         
1096                                         // get rest of line in case there is a primer name
1097                                         while (!inOligos.eof()) {       
1098                                                 char c = inOligos.get(); 
1099                                                 if (c == 10 || c == 13){        break;  }
1100                                                 else if (c == 32 || c == 9){;} //space or tab
1101                                                 else {  group += c;  }
1102                                         } 
1103                                         
1104                                         //check for repeat barcodes
1105                                         map<string, int>::iterator itPrime = primers.find(oligo);
1106                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1107                                         
1108                                         primers[oligo]=indexPrimer; indexPrimer++;              
1109                                         primerNameVector.push_back(group);
1110                                 }
1111                                 else if(type == "REVERSE"){
1112                                         Sequence oligoRC("reverse", oligo);
1113                                         oligoRC.reverseComplement();
1114                                         revPrimer.push_back(oligoRC.getUnaligned());
1115                                 }
1116                                 else if(type == "BARCODE"){
1117                                         inOligos >> group;
1118                                         
1119                                         //check for repeat barcodes
1120                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1121                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1122                                                 
1123                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1124                                         barcodeNameVector.push_back(group);
1125                                 }
1126                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
1127                         }
1128                         m->gobble(inOligos);
1129                 }       
1130                 inOligos.close();
1131                 
1132                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1133                 
1134                 //add in potential combos
1135                 if(barcodeNameVector.size() == 0){
1136                         barcodes[""] = 0;
1137                         barcodeNameVector.push_back("");                        
1138                 }
1139                 
1140                 if(primerNameVector.size() == 0){
1141                         primers[""] = 0;
1142                         primerNameVector.push_back("");                 
1143                 }
1144                 
1145                 fastaFileNames.resize(barcodeNameVector.size());
1146                 for(int i=0;i<fastaFileNames.size();i++){
1147                         fastaFileNames[i].assign(primerNameVector.size(), "");
1148                 }
1149                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1150                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1151                 
1152                 if(allFiles){
1153                         set<string> uniqueNames; //used to cleanup outputFileNames
1154                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1155                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1156                                         
1157                                         string primerName = primerNameVector[itPrimer->second];
1158                                         string barcodeName = barcodeNameVector[itBar->second];
1159                                         
1160                                         string comboGroupName = "";
1161                                         string fastaFileName = "";
1162                                         string qualFileName = "";
1163                                         string nameFileName = "";
1164                                         
1165                                         if(primerName == ""){
1166                                                 comboGroupName = barcodeNameVector[itBar->second];
1167                                         }
1168                                         else{
1169                                                 if(barcodeName == ""){
1170                                                         comboGroupName = primerNameVector[itPrimer->second];
1171                                                 }
1172                                                 else{
1173                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1174                                                 }
1175                                         }
1176                                         
1177                                         
1178                                         ofstream temp;
1179                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1180                                         if (uniqueNames.count(fastaFileName) == 0) {
1181                                                 outputNames.push_back(fastaFileName);
1182                                                 outputTypes["fasta"].push_back(fastaFileName);
1183                                                 uniqueNames.insert(fastaFileName);
1184                                         }
1185                                         
1186                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1187                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1188                                         
1189                                         if(qFileName != ""){
1190                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1191                                                 if (uniqueNames.count(qualFileName) == 0) {
1192                                                         outputNames.push_back(qualFileName);
1193                                                         outputTypes["qfile"].push_back(qualFileName);
1194                                                 }
1195                                                 
1196                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1197                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1198                                         }
1199                                         
1200                                         if(nameFile != ""){
1201                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1202                                                 if (uniqueNames.count(nameFileName) == 0) {
1203                                                         outputNames.push_back(nameFileName);
1204                                                         outputTypes["name"].push_back(nameFileName);
1205                                                 }
1206                                                 
1207                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1208                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1209                                         }
1210                                         
1211                                 }
1212                         }
1213                 }
1214                 numFPrimers = primers.size();
1215                 numRPrimers = revPrimer.size();
1216                 
1217                 bool allBlank = true;
1218                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1219                         if (barcodeNameVector[i] != "") {
1220                                 allBlank = false;
1221                                 break;
1222                         }
1223                 }
1224                 for (int i = 0; i < primerNameVector.size(); i++) {
1225                         if (primerNameVector[i] != "") {
1226                                 allBlank = false;
1227                                 break;
1228                         }
1229                 }
1230                 
1231                 if (allBlank) {
1232                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1233                         allFiles = false;
1234                         return false;
1235                 }
1236                 
1237                 return true;
1238                 
1239         }
1240         catch(exception& e) {
1241                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1242                 exit(1);
1243         }
1244 }
1245 //***************************************************************************************************************
1246
1247 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1248         try {
1249                 bool success = 1;
1250                 if(qscores.getName() != ""){
1251                         qscores.trimQScores(-1, keepFirst);
1252                 }
1253                 sequence.trim(keepFirst);
1254                 return success;
1255         }
1256         catch(exception& e) {
1257                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1258                 exit(1);
1259         }
1260         
1261 }       
1262
1263 //***************************************************************************************************************
1264
1265 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1266         try {
1267                 bool success = 0;
1268                 
1269                 int length = sequence.getNumBases() - removeLast;
1270                 
1271                 if(length > 0){
1272                         if(qscores.getName() != ""){
1273                                 qscores.trimQScores(-1, length);
1274                         }
1275                         sequence.trim(length);
1276                         success = 1;
1277                 }
1278                 else{
1279                         success = 0;
1280                 }
1281
1282                 return success;
1283         }
1284         catch(exception& e) {
1285                 m->errorOut(e, "removeLastTrim", "countDiffs");
1286                 exit(1);
1287         }
1288         
1289 }       
1290
1291 //***************************************************************************************************************
1292
1293 bool TrimSeqsCommand::cullLength(Sequence& seq){
1294         try {
1295         
1296                 int length = seq.getNumBases();
1297                 bool success = 0;       //guilty until proven innocent
1298                 
1299                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1300                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1301                 else                                                                                            {       success = 0;    }
1302                 
1303                 return success;
1304         
1305         }
1306         catch(exception& e) {
1307                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1308                 exit(1);
1309         }
1310         
1311 }
1312
1313 //***************************************************************************************************************
1314
1315 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1316         try {
1317                 int longHomoP = seq.getLongHomoPolymer();
1318                 bool success = 0;       //guilty until proven innocent
1319                 
1320                 if(longHomoP <= maxHomoP){      success = 1;    }
1321                 else                                    {       success = 0;    }
1322                 
1323                 return success;
1324         }
1325         catch(exception& e) {
1326                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1327                 exit(1);
1328         }
1329         
1330 }
1331
1332 //***************************************************************************************************************
1333
1334 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1335         try {
1336                 int numNs = seq.getAmbigBases();
1337                 bool success = 0;       //guilty until proven innocent
1338                 
1339                 if(numNs <= maxAmbig)   {       success = 1;    }
1340                 else                                    {       success = 0;    }
1341                 
1342                 return success;
1343         }
1344         catch(exception& e) {
1345                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1346                 exit(1);
1347         }
1348         
1349 }
1350 //***************************************************************************************************************