]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
fixed bug with shhh.flow from file path name in write functions, added "smart" featur...
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::setParameters(){        
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
22                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
23                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
24                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
25                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
26                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
27                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
28                 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
29                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
30                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
31                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
32                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
33                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
34                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
35                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
36                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
37                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
38                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
39                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
40                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
41                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
42                         
43                 vector<string> myArray;
44                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 string TrimSeqsCommand::getHelpString(){        
54         try {
55                 string helpString = "";
56                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
57                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
58                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
59                 helpString += "The fasta parameter is required.\n";
60                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
61                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
62                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
63                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
64                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
65                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
66                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
67                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n";
68                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
69                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
70                 helpString += "The qfile parameter allows you to provide a quality file.\n";
71                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
72                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
73                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
74                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
75                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
76                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
77                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
78                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
79                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
80                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
81                 helpString += "The trim.seqs command should be in the following format: \n";
82                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
83                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
84                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
85                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
86                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
87                 return helpString;
88         }
89         catch(exception& e) {
90                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
91                 exit(1);
92         }
93 }
94
95
96 //**********************************************************************************************************************
97
98 TrimSeqsCommand::TrimSeqsCommand(){     
99         try {
100                 abort = true; calledHelp = true; 
101                 setParameters();
102                 vector<string> tempOutNames;
103                 outputTypes["fasta"] = tempOutNames;
104                 outputTypes["qfile"] = tempOutNames;
105                 outputTypes["group"] = tempOutNames;
106                 outputTypes["name"] = tempOutNames;
107         }
108         catch(exception& e) {
109                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
110                 exit(1);
111         }
112 }
113 //***************************************************************************************************************
114
115 TrimSeqsCommand::TrimSeqsCommand(string option)  {
116         try {
117                 
118                 abort = false; calledHelp = false;   
119                 comboStarts = 0;
120                 
121                 //allow user to run help
122                 if(option == "help") { help(); abort = true; calledHelp = true; }
123                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
124                 
125                 else {
126                         vector<string> myArray = setParameters();
127                         
128                         OptionParser parser(option);
129                         map<string,string> parameters = parser.getParameters();
130                         
131                         ValidParameters validParameter;
132                         map<string,string>::iterator it;
133                         
134                         //check to make sure all parameters are valid for command
135                         for (it = parameters.begin(); it != parameters.end(); it++) { 
136                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
137                         }
138                         
139                         //initialize outputTypes
140                         vector<string> tempOutNames;
141                         outputTypes["fasta"] = tempOutNames;
142                         outputTypes["qfile"] = tempOutNames;
143                         outputTypes["group"] = tempOutNames;
144                         outputTypes["name"] = tempOutNames;
145                         
146                         //if the user changes the input directory command factory will send this info to us in the output parameter 
147                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
148                         if (inputDir == "not found"){   inputDir = "";          }
149                         else {
150                                 string path;
151                                 it = parameters.find("fasta");
152                                 //user has given a template file
153                                 if(it != parameters.end()){ 
154                                         path = m->hasPath(it->second);
155                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
157                                 }
158                                 
159                                 it = parameters.find("oligos");
160                                 //user has given a template file
161                                 if(it != parameters.end()){ 
162                                         path = m->hasPath(it->second);
163                                         //if the user has not given a path then, add inputdir. else leave path alone.
164                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
165                                 }
166                                 
167                                 it = parameters.find("qfile");
168                                 //user has given a template file
169                                 if(it != parameters.end()){ 
170                                         path = m->hasPath(it->second);
171                                         //if the user has not given a path then, add inputdir. else leave path alone.
172                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
173                                 }
174                                 
175                                 it = parameters.find("name");
176                                 //user has given a template file
177                                 if(it != parameters.end()){ 
178                                         path = m->hasPath(it->second);
179                                         //if the user has not given a path then, add inputdir. else leave path alone.
180                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
181                                 }
182                                 
183                         }
184
185                         
186                         //check for required parameters
187                         fastaFile = validParameter.validFile(parameters, "fasta", true);
188                         if (fastaFile == "not found") {                                 
189                                 fastaFile = m->getFastaFile(); 
190                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
191                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
192                         }else if (fastaFile == "not open") { abort = true; }    
193                         else { m->setFastaFile(fastaFile); }
194                         
195                         //if the user changes the output directory command factory will send this info to us in the output parameter 
196                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
197                                 outputDir = ""; 
198                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
199                         }
200                 
201                         
202                         //check for optional parameter and set defaults
203                         // ...at some point should added some additional type checking...
204                         string temp;
205                         temp = validParameter.validFile(parameters, "flip", false);
206                         if (temp == "not found")    {   flip = 0;       }
207                         else {  flip = m->isTrue(temp);         }
208                 
209                         temp = validParameter.validFile(parameters, "oligos", true);
210                         if (temp == "not found"){       oligoFile = "";         }
211                         else if(temp == "not open"){    abort = true;   } 
212                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
213                         
214                         
215                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
216                         m->mothurConvert(temp, maxAmbig);  
217
218                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
219                         m->mothurConvert(temp, maxHomoP);  
220
221                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
222                         m->mothurConvert(temp, minLength); 
223                         
224                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
225                         m->mothurConvert(temp, maxLength);
226                         
227                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
228                         m->mothurConvert(temp, bdiffs);
229                         
230                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
231                         m->mothurConvert(temp, pdiffs);
232                         
233                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
234                         m->mothurConvert(temp, tdiffs);
235                         
236                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
237                         
238                         temp = validParameter.validFile(parameters, "qfile", true);     
239                         if (temp == "not found")        {       qFileName = "";         }
240                         else if(temp == "not open")     {       abort = true;           }
241                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
242                         
243                         temp = validParameter.validFile(parameters, "name", true);      
244                         if (temp == "not found")        {       nameFile = "";          }
245                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
246                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
247                         
248                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
249                         m->mothurConvert(temp, qThreshold);
250                         
251                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
252                         qtrim = m->isTrue(temp);
253
254                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
255                         convert(temp, qRollAverage);
256
257                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
258                         convert(temp, qWindowAverage);
259
260                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
261                         convert(temp, qWindowSize);
262
263                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
264                         convert(temp, qWindowStep);
265
266                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
267                         convert(temp, qAverage);
268
269                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
270                         convert(temp, keepFirst);
271
272                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
273                         convert(temp, removeLast);
274                         
275                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
276                         allFiles = m->isTrue(temp);
277                         
278                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
279                         m->setProcessors(temp);
280                         m->mothurConvert(temp, processors); 
281                         
282                         
283                         if(allFiles && (oligoFile == "")){
284                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
285                         }
286                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
287                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
288                                 qAverage=0;
289                                 qThreshold=0;
290                         }
291                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
292                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
293                                 abort = true;
294                         }
295                         
296                         if (nameFile == "") {
297                                 vector<string> files; files.push_back(fastaFile);
298                                 parser.getNameFile(files);
299                         }
300                 }
301
302         }
303         catch(exception& e) {
304                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
305                 exit(1);
306         }
307 }
308 //***************************************************************************************************************
309
310 int TrimSeqsCommand::execute(){
311         try{
312         
313                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
314                 
315                 numFPrimers = 0;  //this needs to be initialized
316                 numRPrimers = 0;
317                 createGroup = false;
318                 vector<vector<string> > fastaFileNames;
319                 vector<vector<string> > qualFileNames;
320                 vector<vector<string> > nameFileNames;
321                 
322                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
323                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
324                 
325                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
326                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
327                 
328                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
329                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
330                 
331                 if (qFileName != "") {
332                         outputNames.push_back(trimQualFile);
333                         outputNames.push_back(scrapQualFile);
334                         outputTypes["qfile"].push_back(trimQualFile);
335                         outputTypes["qfile"].push_back(scrapQualFile); 
336                 }
337                 
338                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
339                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
340                 
341                 if (nameFile != "") {
342                         m->readNames(nameFile, nameMap);
343                         outputNames.push_back(trimNameFile);
344                         outputNames.push_back(scrapNameFile);
345                         outputTypes["name"].push_back(trimNameFile);
346                         outputTypes["name"].push_back(scrapNameFile); 
347                 }
348                 
349                 if (m->control_pressed) { return 0; }
350                 
351                 string outputGroupFileName;
352                 if(oligoFile != ""){
353                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
354                         if (createGroup) {
355                                 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
356                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
357                         }
358                 }
359                 
360                 vector<unsigned long long> fastaFilePos;
361                 vector<unsigned long long> qFilePos;
362                 
363                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
364                 
365                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
366                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
367                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
368                 }       
369                 if(qFileName == "")     {       qLines = lines; } //files with duds
370                 
371                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
372                                 if(processors == 1){
373                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
374                                 }else{
375                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
376                                 }       
377                 #else
378                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
379                 #endif
380                 
381                 if (m->control_pressed) {  return 0; }                  
382         
383                 if(allFiles){
384                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
385                         map<string, string>::iterator it;
386                         set<string> namesToRemove;
387                         for(int i=0;i<fastaFileNames.size();i++){
388                                 for(int j=0;j<fastaFileNames[0].size();j++){
389                                         if (fastaFileNames[i][j] != "") {
390                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
391                                                         if(m->isBlank(fastaFileNames[i][j])){
392                                                                 m->mothurRemove(fastaFileNames[i][j]);
393                                                                 namesToRemove.insert(fastaFileNames[i][j]);
394                                                         
395                                                                 if(qFileName != ""){
396                                                                         m->mothurRemove(qualFileNames[i][j]);
397                                                                         namesToRemove.insert(qualFileNames[i][j]);
398                                                                 }
399                                                                 
400                                                                 if(nameFile != ""){
401                                                                         m->mothurRemove(nameFileNames[i][j]);
402                                                                         namesToRemove.insert(nameFileNames[i][j]);
403                                                                 }
404                                                         }else{  
405                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
406                                                                 if (it == uniqueFastaNames.end()) {     
407                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
408                                                                 }       
409                                                         }
410                                                 }
411                                         }
412                                 }
413                         }
414                         
415                         //remove names for outputFileNames, just cleans up the output
416                         vector<string> outputNames2;
417                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
418                         outputNames = outputNames2;
419                         
420                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
421                                 ifstream in;
422                                 m->openInputFile(it->first, in);
423                                 
424                                 ofstream out;
425                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
426                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
427                                 m->openOutputFile(thisGroupName, out);
428                                 
429                                 while (!in.eof()){
430                                         if (m->control_pressed) { break; }
431                                         
432                                         Sequence currSeq(in); m->gobble(in);
433                                         out << currSeq.getName() << '\t' << it->second << endl;
434                                 }
435                                 in.close();
436                                 out.close();
437                         }
438                 }
439                 
440                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
441
442                 //output group counts
443                 m->mothurOutEndLine();
444                 int total = 0;
445                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
446                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
447                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
448                 }
449                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
450                 
451                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
452
453                 //set fasta file as new current fastafile
454                 string current = "";
455                 itTypes = outputTypes.find("fasta");
456                 if (itTypes != outputTypes.end()) {
457                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
458                 }
459                 
460                 itTypes = outputTypes.find("name");
461                 if (itTypes != outputTypes.end()) {
462                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
463                 }
464                 
465                 itTypes = outputTypes.find("qfile");
466                 if (itTypes != outputTypes.end()) {
467                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
468                 }
469                 
470                 itTypes = outputTypes.find("group");
471                 if (itTypes != outputTypes.end()) {
472                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
473                 }
474
475                 m->mothurOutEndLine();
476                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
477                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
478                 m->mothurOutEndLine();
479                 
480                 return 0;       
481                         
482         }
483         catch(exception& e) {
484                 m->errorOut(e, "TrimSeqsCommand", "execute");
485                 exit(1);
486         }
487 }
488                 
489 /**************************************************************************************/
490
491 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair* line, linePair* qline) {    
492                 
493         try {
494                 
495                 ofstream trimFASTAFile;
496                 m->openOutputFile(trimFileName, trimFASTAFile);
497                 
498                 ofstream scrapFASTAFile;
499                 m->openOutputFile(scrapFileName, scrapFASTAFile);
500                 
501                 ofstream trimQualFile;
502                 ofstream scrapQualFile;
503                 if(qFileName != ""){
504                         m->openOutputFile(trimQFileName, trimQualFile);
505                         m->openOutputFile(scrapQFileName, scrapQualFile);
506                 }
507                 
508                 ofstream trimNameFile;
509                 ofstream scrapNameFile;
510                 if(nameFile != ""){
511                         m->openOutputFile(trimNFileName, trimNameFile);
512                         m->openOutputFile(scrapNFileName, scrapNameFile);
513                 }
514                 
515                 
516                 ofstream outGroupsFile;
517                 if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
518                 if(allFiles){
519                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
520                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
521                                         if (fastaFileNames[i][j] != "") {
522                                                 ofstream temp;
523                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
524                                                 if(qFileName != ""){
525                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
526                                                 }
527                                                 
528                                                 if(nameFile != ""){
529                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
530                                                 }
531                                         }
532                                 }
533                         }
534                 }
535                 
536                 ifstream inFASTA;
537                 m->openInputFile(filename, inFASTA);
538                 inFASTA.seekg(line->start);
539                 
540                 ifstream qFile;
541                 if(qFileName != "")     {
542                         m->openInputFile(qFileName, qFile);
543                         qFile.seekg(qline->start);  
544                 }
545                 
546                 int count = 0;
547                 bool moreSeqs = 1;
548                 TrimOligos trimOligos(pdiffs, bdiffs, primers, barcodes, revPrimer);
549         
550                 while (moreSeqs) {
551                                 
552                         if (m->control_pressed) { 
553                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
554                                 if (createGroup) {       outGroupsFile.close();   }
555
556                                 if(qFileName != ""){
557                                         qFile.close();
558                                 }
559                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
560
561                                 return 0;
562                         }
563                         
564                         int success = 1;
565                         string trashCode = "";
566                         int currentSeqsDiffs = 0;
567
568                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
569                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
570                         QualityScores currQual;
571                         if(qFileName != ""){
572                                 currQual = QualityScores(qFile);  m->gobble(qFile);
573                         }
574                         
575                         string origSeq = currSeq.getUnaligned();
576                         if (origSeq != "") {
577                                 
578                                 int barcodeIndex = 0;
579                                 int primerIndex = 0;
580                                 
581                                 if(barcodes.size() != 0){
582                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
583                                         if(success > bdiffs)            {       trashCode += 'b';       }
584                                         else{ currentSeqsDiffs += success;  }
585                                 }
586                                 
587                                 if(numFPrimers != 0){
588                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex);
589                                         if(success > pdiffs)            {       trashCode += 'f';       }
590                                         else{ currentSeqsDiffs += success;  }
591                                 }
592                                 
593                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
594                                 
595                                 if(numRPrimers != 0){
596                                         success = trimOligos.stripReverse(currSeq, currQual);
597                                         if(!success)                            {       trashCode += 'r';       }
598                                 }
599
600                                 if(keepFirst != 0){
601                                         success = keepFirstTrim(currSeq, currQual);
602                                 }
603                                 
604                                 if(removeLast != 0){
605                                         success = removeLastTrim(currSeq, currQual);
606                                         if(!success)                            {       trashCode += 'l';       }
607                                 }
608
609                                 
610                                 if(qFileName != ""){
611                                         int origLength = currSeq.getNumBases();
612                                         
613                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
614                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
615                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
616                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
617                                         else                                            {       success = 1;                            }
618                                         
619                                         //you don't want to trim, if it fails above then scrap it
620                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
621                                         
622                                         if(!success)                            {       trashCode += 'q';       }
623                                 }                               
624                 
625                                 if(minLength > 0 || maxLength > 0){
626                                         success = cullLength(currSeq);
627                                         if(!success)                            {       trashCode += 'l';       }
628                                 }
629                                 if(maxHomoP > 0){
630                                         success = cullHomoP(currSeq);
631                                         if(!success)                            {       trashCode += 'h';       }
632                                 }
633                                 if(maxAmbig != -1){
634                                         success = cullAmbigs(currSeq);
635                                         if(!success)                            {       trashCode += 'n';       }
636                                 }
637                                 
638                                 if(flip){               // should go last                       
639                                         currSeq.reverseComplement();
640                                         if(qFileName != ""){
641                                                 currQual.flipQScores(); 
642                                         }
643                                 }
644                                 
645                                 if(trashCode.length() == 0){
646                                         currSeq.setAligned(currSeq.getUnaligned());
647                                         currSeq.printSequence(trimFASTAFile);
648                                         
649                                         if(qFileName != ""){
650                                                 currQual.printQScores(trimQualFile);
651                                         }
652                                         
653                                         if(nameFile != ""){
654                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
655                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
656                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
657                                         }
658                                         
659                                         if (createGroup) {
660                                                 if(barcodes.size() != 0){
661                                                         string thisGroup = barcodeNameVector[barcodeIndex];
662                                                         if (primers.size() != 0) { 
663                                                                 if (primerNameVector[primerIndex] != "") { 
664                                                                         if(thisGroup != "") {
665                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
666                                                                         }else {
667                                                                                 thisGroup = primerNameVector[primerIndex]; 
668                                                                         }
669                                                                 } 
670                                                         }
671                                                         
672                                                         outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
673                                                         
674                                                         if (nameFile != "") {
675                                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
676                                                                 if (itName != nameMap.end()) { 
677                                                                         vector<string> thisSeqsNames; 
678                                                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
679                                                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
680                                                                                 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
681                                                                         }
682                                                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
683                                                         }
684                                                         
685                                                         map<string, int>::iterator it = groupCounts.find(thisGroup);
686                                                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
687                                                         else { groupCounts[it->first]++; }
688                                                                 
689                                                 }
690                                         }
691                                         
692                                         if(allFiles){
693                                                 ofstream output;
694                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
695                                                 currSeq.printSequence(output);
696                                                 output.close();
697                                                 
698                                                 if(qFileName != ""){
699                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
700                                                         currQual.printQScores(output);
701                                                         output.close();                                                 
702                                                 }
703                                                 
704                                                 if(nameFile != ""){
705                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
706                                                         if (itName != nameMap.end()) { 
707                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
708                                                                 output << itName->first << '\t' << itName->second << endl; 
709                                                                 output.close();
710                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
711                                                 }
712                                         }
713                                 }
714                                 else{
715                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
716                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
717                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
718                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
719                                         }
720                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
721                                         currSeq.setUnaligned(origSeq);
722                                         currSeq.setAligned(origSeq);
723                                         currSeq.printSequence(scrapFASTAFile);
724                                         if(qFileName != ""){
725                                                 currQual.printQScores(scrapQualFile);
726                                         }
727                                 }
728                                 count++;
729                         }
730                         
731                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
732                                 unsigned long long pos = inFASTA.tellg();
733                                 if ((pos == -1) || (pos >= line->end)) { break; }
734                         
735                         #else
736                                 if (inFASTA.eof()) { break; }
737                         #endif
738                         
739                         //report progress
740                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
741                         
742                 }
743                 //report progress
744                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
745                 
746                 
747                 inFASTA.close();
748                 trimFASTAFile.close();
749                 scrapFASTAFile.close();
750                 if (createGroup) {       outGroupsFile.close();   }
751                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
752                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
753                 
754                 return count;
755         }
756         catch(exception& e) {
757                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
758                 exit(1);
759         }
760 }
761
762 /**************************************************************************************************/
763
764 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
765         try {
766 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
767                 int process = 1;
768                 int exitCommand = 1;
769                 processIDS.clear();
770                 
771                 //loop through and create all the processes you want
772                 while (process != processors) {
773                         int pid = fork();
774                         
775                         if (pid > 0) {
776                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
777                                 process++;
778                         }else if (pid == 0){
779                                 
780                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
781                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
782                                 vector<vector<string> > tempNameFileNames = nameFileNames;
783
784                                 if(allFiles){
785                                         ofstream temp;
786
787                                         for(int i=0;i<tempFASTAFileNames.size();i++){
788                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
789                                                         if (tempFASTAFileNames[i][j] != "") {
790                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
791                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
792
793                                                                 if(qFileName != ""){
794                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
795                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
796                                                                 }
797                                                                 if(nameFile != ""){
798                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
799                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
800                                                                 }
801                                                         }
802                                                 }
803                                         }
804                                 }
805                                                         
806                                 driverCreateTrim(filename,
807                                                                  qFileName,
808                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
809                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
810                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
811                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
812                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
813                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
814                                                                  (groupFile + toString(getpid()) + ".temp"),
815                                                                  tempFASTAFileNames,
816                                                                  tempPrimerQualFileNames,
817                                                                  tempNameFileNames,
818                                                                  lines[process],
819                                                                  qLines[process]);
820                                 
821                                 //pass groupCounts to parent
822                                 if(createGroup){
823                                         ofstream out;
824                                         string tempFile = filename + toString(getpid()) + ".num.temp";
825                                         m->openOutputFile(tempFile, out);
826                                         
827                                         out << groupCounts.size() << endl;
828                                         
829                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
830                                                 out << it->first << '\t' << it->second << endl;
831                                         }
832                                         out.close();
833                                 }
834                                 exit(0);
835                         }else { 
836                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
837                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
838                                 exit(0);
839                         }
840                 }
841                 
842                 //parent do my part
843                 ofstream temp;
844                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
845                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
846                 if(qFileName != ""){
847                         m->openOutputFile(trimQualFileName, temp);              temp.close();
848                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
849                 }
850                 if (nameFile != "") {
851                         m->openOutputFile(trimNameFileName, temp);              temp.close();
852                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
853                 }
854
855                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
856                 
857                 //force parent to wait until all the processes are done
858                 for (int i=0;i<processIDS.size();i++) { 
859                         int temp = processIDS[i];
860                         wait(&temp);
861                 }
862                 
863                 //append files
864                 for(int i=0;i<processIDS.size();i++){
865                         
866                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
867                         
868                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
869                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
870                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
871                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
872                         
873                         if(qFileName != ""){
874                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
875                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
876                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
877                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
878                         }
879                         
880                         if(nameFile != ""){
881                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
882                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
883                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
884                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
885                         }
886                         
887                         if(createGroup){
888                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
889                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
890                         }
891                         
892                         
893                         if(allFiles){
894                                 for(int j=0;j<fastaFileNames.size();j++){
895                                         for(int k=0;k<fastaFileNames[j].size();k++){
896                                                 if (fastaFileNames[j][k] != "") {
897                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
898                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
899                                                         
900                                                         if(qFileName != ""){
901                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
902                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
903                                                         }
904                                                         
905                                                         if(nameFile != ""){
906                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
907                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
908                                                         }
909                                                 }
910                                         }
911                                 }
912                         }
913                         
914                         if(createGroup){
915                                 ifstream in;
916                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
917                                 m->openInputFile(tempFile, in);
918                                 int tempNum;
919                                 string group;
920                                 
921                                 in >> tempNum; m->gobble(in);
922                                 
923                                 if (tempNum != 0) {
924                                         while (!in.eof()) { 
925                                                 in >> group >> tempNum; m->gobble(in);
926                                 
927                                                 map<string, int>::iterator it = groupCounts.find(group);
928                                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
929                                                 else { groupCounts[it->first] += tempNum; }
930                                         }
931                                 }
932                                 in.close(); m->mothurRemove(tempFile);
933                         }
934                         
935                 }
936         
937                 return exitCommand;
938 #endif          
939         }
940         catch(exception& e) {
941                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
942                 exit(1);
943         }
944 }
945
946 /**************************************************************************************************/
947
948 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long long>& fastaFilePos, vector<unsigned long long>& qfileFilePos) {
949         try {
950                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
951                 //set file positions for fasta file
952                 fastaFilePos = m->divideFile(filename, processors);
953                 
954                 if (qfilename == "") { return processors; }
955                 
956                 //get name of first sequence in each chunk
957                 map<string, int> firstSeqNames;
958                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
959                         ifstream in;
960                         m->openInputFile(filename, in);
961                         in.seekg(fastaFilePos[i]);
962                 
963                         Sequence temp(in); 
964                         firstSeqNames[temp.getName()] = i;
965                 
966                         in.close();
967                 }
968                                 
969                 //seach for filePos of each first name in the qfile and save in qfileFilePos
970                 ifstream inQual;
971                 m->openInputFile(qfilename, inQual);
972                 
973                 string input;
974                 while(!inQual.eof()){   
975                         input = m->getline(inQual);
976
977                         if (input.length() != 0) {
978                                 if(input[0] == '>'){ //this is a sequence name line
979                                         istringstream nameStream(input);
980                                         
981                                         string sname = "";  nameStream >> sname;
982                                         sname = sname.substr(1);
983                                         
984                                         map<string, int>::iterator it = firstSeqNames.find(sname);
985                                         
986                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
987                                                 unsigned long long pos = inQual.tellg(); 
988                                                 qfileFilePos.push_back(pos - input.length() - 1);       
989                                                 firstSeqNames.erase(it);
990                                         }
991                                 }
992                         }
993                         
994                         if (firstSeqNames.size() == 0) { break; }
995                 }
996                 inQual.close();
997                 
998                 
999                 if (firstSeqNames.size() != 0) { 
1000                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1001                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1002                         }
1003                         qFileName = "";
1004                         return processors;
1005                 }
1006
1007                 //get last file position of qfile
1008                 FILE * pFile;
1009                 unsigned long long size;
1010                 
1011                 //get num bytes in file
1012                 pFile = fopen (qfilename.c_str(),"rb");
1013                 if (pFile==NULL) perror ("Error opening file");
1014                 else{
1015                         fseek (pFile, 0, SEEK_END);
1016                         size=ftell (pFile);
1017                         fclose (pFile);
1018                 }
1019                 
1020                 qfileFilePos.push_back(size);
1021                 
1022                 return processors;
1023                 
1024                 #else
1025                 
1026                         fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1027                         fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1028                         return 1;
1029                 
1030                 #endif
1031         }
1032         catch(exception& e) {
1033                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1034                 exit(1);
1035         }
1036 }
1037
1038 //***************************************************************************************************************
1039
1040 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1041         try {
1042                 ifstream inOligos;
1043                 m->openInputFile(oligoFile, inOligos);
1044                 
1045                 ofstream test;
1046                 
1047                 string type, oligo, group;
1048
1049                 int indexPrimer = 0;
1050                 int indexBarcode = 0;
1051                 
1052                 while(!inOligos.eof()){
1053
1054                         inOligos >> type; 
1055                                         
1056                         if(type[0] == '#'){
1057                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1058                                 m->gobble(inOligos);
1059                         }
1060                         else{
1061                                 m->gobble(inOligos);
1062                                 //make type case insensitive
1063                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1064                                 
1065                                 inOligos >> oligo;
1066                                 
1067                                 for(int i=0;i<oligo.length();i++){
1068                                         oligo[i] = toupper(oligo[i]);
1069                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1070                                 }
1071                                 
1072                                 if(type == "FORWARD"){
1073                                         group = "";
1074                                         
1075                                         // get rest of line in case there is a primer name
1076                                         while (!inOligos.eof()) {       
1077                                                 char c = inOligos.get(); 
1078                                                 if (c == 10 || c == 13){        break;  }
1079                                                 else if (c == 32 || c == 9){;} //space or tab
1080                                                 else {  group += c;  }
1081                                         } 
1082                                         
1083                                         //check for repeat barcodes
1084                                         map<string, int>::iterator itPrime = primers.find(oligo);
1085                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1086                                         
1087                                         primers[oligo]=indexPrimer; indexPrimer++;              
1088                                         primerNameVector.push_back(group);
1089                                 }
1090                                 else if(type == "REVERSE"){
1091                                         Sequence oligoRC("reverse", oligo);
1092                                         oligoRC.reverseComplement();
1093                                         revPrimer.push_back(oligoRC.getUnaligned());
1094                                 }
1095                                 else if(type == "BARCODE"){
1096                                         inOligos >> group;
1097                                         
1098                                         //check for repeat barcodes
1099                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1100                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1101                                                 
1102                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1103                                         barcodeNameVector.push_back(group);
1104                                 }
1105                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
1106                         }
1107                         m->gobble(inOligos);
1108                 }       
1109                 inOligos.close();
1110                 
1111                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1112                 
1113                 //add in potential combos
1114                 if(barcodeNameVector.size() == 0){
1115                         barcodes[""] = 0;
1116                         barcodeNameVector.push_back("");                        
1117                 }
1118                 
1119                 if(primerNameVector.size() == 0){
1120                         primers[""] = 0;
1121                         primerNameVector.push_back("");                 
1122                 }
1123                 
1124                 fastaFileNames.resize(barcodeNameVector.size());
1125                 for(int i=0;i<fastaFileNames.size();i++){
1126                         fastaFileNames[i].assign(primerNameVector.size(), "");
1127                 }
1128                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1129                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1130                 
1131                 if(allFiles){
1132                         set<string> uniqueNames; //used to cleanup outputFileNames
1133                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1134                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1135                                         
1136                                         string primerName = primerNameVector[itPrimer->second];
1137                                         string barcodeName = barcodeNameVector[itBar->second];
1138                                         
1139                                         string comboGroupName = "";
1140                                         string fastaFileName = "";
1141                                         string qualFileName = "";
1142                                         string nameFileName = "";
1143                                         
1144                                         if(primerName == ""){
1145                                                 comboGroupName = barcodeNameVector[itBar->second];
1146                                         }
1147                                         else{
1148                                                 if(barcodeName == ""){
1149                                                         comboGroupName = primerNameVector[itPrimer->second];
1150                                                 }
1151                                                 else{
1152                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1153                                                 }
1154                                         }
1155                                         
1156                                         
1157                                         ofstream temp;
1158                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1159                                         if (uniqueNames.count(fastaFileName) == 0) {
1160                                                 outputNames.push_back(fastaFileName);
1161                                                 outputTypes["fasta"].push_back(fastaFileName);
1162                                                 uniqueNames.insert(fastaFileName);
1163                                         }
1164                                         
1165                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1166                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1167                                         
1168                                         if(qFileName != ""){
1169                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1170                                                 if (uniqueNames.count(qualFileName) == 0) {
1171                                                         outputNames.push_back(qualFileName);
1172                                                         outputTypes["qfile"].push_back(qualFileName);
1173                                                 }
1174                                                 
1175                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1176                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1177                                         }
1178                                         
1179                                         if(nameFile != ""){
1180                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1181                                                 if (uniqueNames.count(nameFileName) == 0) {
1182                                                         outputNames.push_back(nameFileName);
1183                                                         outputTypes["name"].push_back(nameFileName);
1184                                                 }
1185                                                 
1186                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1187                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1188                                         }
1189                                         
1190                                 }
1191                         }
1192                 }
1193                 numFPrimers = primers.size();
1194                 numRPrimers = revPrimer.size();
1195                 
1196                 bool allBlank = true;
1197                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1198                         if (barcodeNameVector[i] != "") {
1199                                 allBlank = false;
1200                                 break;
1201                         }
1202                 }
1203                 for (int i = 0; i < primerNameVector.size(); i++) {
1204                         if (primerNameVector[i] != "") {
1205                                 allBlank = false;
1206                                 break;
1207                         }
1208                 }
1209                 
1210                 if (allBlank) {
1211                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1212                         allFiles = false;
1213                         return false;
1214                 }
1215                 
1216                 return true;
1217                 
1218         }
1219         catch(exception& e) {
1220                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1221                 exit(1);
1222         }
1223 }
1224 //***************************************************************************************************************
1225
1226 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1227         try {
1228                 bool success = 1;
1229                 if(qscores.getName() != ""){
1230                         qscores.trimQScores(-1, keepFirst);
1231                 }
1232                 sequence.trim(keepFirst);
1233                 return success;
1234         }
1235         catch(exception& e) {
1236                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1237                 exit(1);
1238         }
1239         
1240 }       
1241
1242 //***************************************************************************************************************
1243
1244 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1245         try {
1246                 bool success = 0;
1247                 
1248                 int length = sequence.getNumBases() - removeLast;
1249                 
1250                 if(length > 0){
1251                         if(qscores.getName() != ""){
1252                                 qscores.trimQScores(-1, length);
1253                         }
1254                         sequence.trim(length);
1255                         success = 1;
1256                 }
1257                 else{
1258                         success = 0;
1259                 }
1260
1261                 return success;
1262         }
1263         catch(exception& e) {
1264                 m->errorOut(e, "removeLastTrim", "countDiffs");
1265                 exit(1);
1266         }
1267         
1268 }       
1269
1270 //***************************************************************************************************************
1271
1272 bool TrimSeqsCommand::cullLength(Sequence& seq){
1273         try {
1274         
1275                 int length = seq.getNumBases();
1276                 bool success = 0;       //guilty until proven innocent
1277                 
1278                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1279                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1280                 else                                                                                            {       success = 0;    }
1281                 
1282                 return success;
1283         
1284         }
1285         catch(exception& e) {
1286                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1287                 exit(1);
1288         }
1289         
1290 }
1291
1292 //***************************************************************************************************************
1293
1294 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1295         try {
1296                 int longHomoP = seq.getLongHomoPolymer();
1297                 bool success = 0;       //guilty until proven innocent
1298                 
1299                 if(longHomoP <= maxHomoP){      success = 1;    }
1300                 else                                    {       success = 0;    }
1301                 
1302                 return success;
1303         }
1304         catch(exception& e) {
1305                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1306                 exit(1);
1307         }
1308         
1309 }
1310
1311 //***************************************************************************************************************
1312
1313 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1314         try {
1315                 int numNs = seq.getAmbigBases();
1316                 bool success = 0;       //guilty until proven innocent
1317                 
1318                 if(numNs <= maxAmbig)   {       success = 1;    }
1319                 else                                    {       success = 0;    }
1320                 
1321                 return success;
1322         }
1323         catch(exception& e) {
1324                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1325                 exit(1);
1326         }
1327         
1328 }
1329 //***************************************************************************************************************