]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
removed unused copy constructors and comments within comments that where causing...
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::setParameters(){        
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
22                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
23                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
24                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
25                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
26                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
27                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
28         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
29                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
30         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
31                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
32                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
33                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
34                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
35                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
36                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
37                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
38                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
39                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
40                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
41                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
42                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
43                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
44                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
45                         
46                 vector<string> myArray;
47                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string TrimSeqsCommand::getHelpString(){        
57         try {
58                 string helpString = "";
59                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
60                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
61                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
62                 helpString += "The fasta parameter is required.\n";
63                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
64                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
65                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
66                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
67                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
68                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
69                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
70                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
71                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
72                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
73         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
74                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
75                 helpString += "The qfile parameter allows you to provide a quality file.\n";
76                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
77                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
78                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
79                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
80                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
81                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
82                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
83                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
84                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
85                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
86                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
87                 helpString += "The trim.seqs command should be in the following format: \n";
88                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
89                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
90                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
91                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
92                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100
101
102 //**********************************************************************************************************************
103
104 TrimSeqsCommand::TrimSeqsCommand(){     
105         try {
106                 abort = true; calledHelp = true; 
107                 setParameters();
108                 vector<string> tempOutNames;
109                 outputTypes["fasta"] = tempOutNames;
110                 outputTypes["qfile"] = tempOutNames;
111                 outputTypes["group"] = tempOutNames;
112                 outputTypes["name"] = tempOutNames;
113         }
114         catch(exception& e) {
115                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
116                 exit(1);
117         }
118 }
119 //***************************************************************************************************************
120
121 TrimSeqsCommand::TrimSeqsCommand(string option)  {
122         try {
123                 
124                 abort = false; calledHelp = false;   
125                 comboStarts = 0;
126                 
127                 //allow user to run help
128                 if(option == "help") { help(); abort = true; calledHelp = true; }
129                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
130                 
131                 else {
132                         vector<string> myArray = setParameters();
133                         
134                         OptionParser parser(option);
135                         map<string,string> parameters = parser.getParameters();
136                         
137                         ValidParameters validParameter;
138                         map<string,string>::iterator it;
139                         
140                         //check to make sure all parameters are valid for command
141                         for (it = parameters.begin(); it != parameters.end(); it++) { 
142                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
143                         }
144                         
145                         //initialize outputTypes
146                         vector<string> tempOutNames;
147                         outputTypes["fasta"] = tempOutNames;
148                         outputTypes["qfile"] = tempOutNames;
149                         outputTypes["group"] = tempOutNames;
150                         outputTypes["name"] = tempOutNames;
151                         
152                         //if the user changes the input directory command factory will send this info to us in the output parameter 
153                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
154                         if (inputDir == "not found"){   inputDir = "";          }
155                         else {
156                                 string path;
157                                 it = parameters.find("fasta");
158                                 //user has given a template file
159                                 if(it != parameters.end()){ 
160                                         path = m->hasPath(it->second);
161                                         //if the user has not given a path then, add inputdir. else leave path alone.
162                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
163                                 }
164                                 
165                                 it = parameters.find("oligos");
166                                 //user has given a template file
167                                 if(it != parameters.end()){ 
168                                         path = m->hasPath(it->second);
169                                         //if the user has not given a path then, add inputdir. else leave path alone.
170                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
171                                 }
172                                 
173                                 it = parameters.find("qfile");
174                                 //user has given a template file
175                                 if(it != parameters.end()){ 
176                                         path = m->hasPath(it->second);
177                                         //if the user has not given a path then, add inputdir. else leave path alone.
178                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
179                                 }
180                                 
181                                 it = parameters.find("name");
182                                 //user has given a template file
183                                 if(it != parameters.end()){ 
184                                         path = m->hasPath(it->second);
185                                         //if the user has not given a path then, add inputdir. else leave path alone.
186                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
187                                 }
188                                 
189                         }
190
191                         
192                         //check for required parameters
193                         fastaFile = validParameter.validFile(parameters, "fasta", true);
194                         if (fastaFile == "not found") {                                 
195                                 fastaFile = m->getFastaFile(); 
196                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
197                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
198                         }else if (fastaFile == "not open") { abort = true; }    
199                         else { m->setFastaFile(fastaFile); }
200                         
201                         //if the user changes the output directory command factory will send this info to us in the output parameter 
202                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
203                                 outputDir = ""; 
204                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
205                         }
206                 
207                         
208                         //check for optional parameter and set defaults
209                         // ...at some point should added some additional type checking...
210                         string temp;
211                         temp = validParameter.validFile(parameters, "flip", false);
212                         if (temp == "not found")    {   flip = 0;       }
213                         else {  flip = m->isTrue(temp);         }
214                 
215                         temp = validParameter.validFile(parameters, "oligos", true);
216                         if (temp == "not found"){       oligoFile = "";         }
217                         else if(temp == "not open"){    abort = true;   } 
218                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
219                         
220                         
221                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
222                         m->mothurConvert(temp, maxAmbig);  
223
224                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
225                         m->mothurConvert(temp, maxHomoP);  
226
227                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
228                         m->mothurConvert(temp, minLength); 
229                         
230                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
231                         m->mothurConvert(temp, maxLength);
232                         
233                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
234                         m->mothurConvert(temp, bdiffs);
235                         
236                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
237                         m->mothurConvert(temp, pdiffs);
238             
239             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
240                         m->mothurConvert(temp, ldiffs);
241             
242             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
243                         m->mothurConvert(temp, sdiffs);
244                         
245                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
246                         m->mothurConvert(temp, tdiffs);
247                         
248                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
249                         
250                         temp = validParameter.validFile(parameters, "qfile", true);     
251                         if (temp == "not found")        {       qFileName = "";         }
252                         else if(temp == "not open")     {       abort = true;           }
253                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
254                         
255                         temp = validParameter.validFile(parameters, "name", true);      
256                         if (temp == "not found")        {       nameFile = "";          }
257                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
258                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
259                         
260                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
261                         m->mothurConvert(temp, qThreshold);
262                         
263                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
264                         qtrim = m->isTrue(temp);
265
266                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
267                         convert(temp, qRollAverage);
268
269                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
270                         convert(temp, qWindowAverage);
271
272                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
273                         convert(temp, qWindowSize);
274
275                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
276                         convert(temp, qWindowStep);
277
278                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
279                         convert(temp, qAverage);
280
281                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
282                         convert(temp, keepFirst);
283
284                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
285                         convert(temp, removeLast);
286                         
287                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
288                         allFiles = m->isTrue(temp);
289             
290             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
291                         keepforward = m->isTrue(temp);
292                         
293                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
294                         m->setProcessors(temp);
295                         m->mothurConvert(temp, processors); 
296                         
297                         
298                         if(allFiles && (oligoFile == "")){
299                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
300                         }
301                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
302                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
303                                 qAverage=0;
304                                 qThreshold=0;
305                         }
306                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
307                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
308                                 abort = true;
309                         }
310                         
311                         if (nameFile == "") {
312                                 vector<string> files; files.push_back(fastaFile);
313                                 parser.getNameFile(files);
314                         }
315                 }
316
317         }
318         catch(exception& e) {
319                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
320                 exit(1);
321         }
322 }
323 //***************************************************************************************************************
324
325 int TrimSeqsCommand::execute(){
326         try{
327         
328                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
329                 
330                 numFPrimers = 0;  //this needs to be initialized
331                 numRPrimers = 0;
332                 createGroup = false;
333                 vector<vector<string> > fastaFileNames;
334                 vector<vector<string> > qualFileNames;
335                 vector<vector<string> > nameFileNames;
336                 
337                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
338                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
339                 
340                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
341                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
342                 
343                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
344                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
345                 
346                 if (qFileName != "") {
347                         outputNames.push_back(trimQualFile);
348                         outputNames.push_back(scrapQualFile);
349                         outputTypes["qfile"].push_back(trimQualFile);
350                         outputTypes["qfile"].push_back(scrapQualFile); 
351                 }
352                 
353                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
354                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
355                 
356                 if (nameFile != "") {
357                         m->readNames(nameFile, nameMap);
358                         outputNames.push_back(trimNameFile);
359                         outputNames.push_back(scrapNameFile);
360                         outputTypes["name"].push_back(trimNameFile);
361                         outputTypes["name"].push_back(scrapNameFile); 
362                 }
363                 
364                 if (m->control_pressed) { return 0; }
365                 
366                 string outputGroupFileName;
367                 if(oligoFile != ""){
368                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
369                         if (createGroup) {
370                                 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
371                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
372                         }
373                 }
374                 
375                 vector<unsigned long long> fastaFilePos;
376                 vector<unsigned long long> qFilePos;
377                 
378                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
379                 
380                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
381                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
382                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
383                 }       
384                 if(qFileName == "")     {       qLines = lines; } //files with duds
385                 
386                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
387                                 if(processors == 1){
388                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
389                                 }else{
390                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
391                                 }       
392                 #else
393                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
394                 #endif
395                 
396                 if (m->control_pressed) {  return 0; }                  
397         
398                 if(allFiles){
399                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
400                         map<string, string>::iterator it;
401                         set<string> namesToRemove;
402                         for(int i=0;i<fastaFileNames.size();i++){
403                                 for(int j=0;j<fastaFileNames[0].size();j++){
404                                         if (fastaFileNames[i][j] != "") {
405                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
406                                                         if(m->isBlank(fastaFileNames[i][j])){
407                                                                 m->mothurRemove(fastaFileNames[i][j]);
408                                                                 namesToRemove.insert(fastaFileNames[i][j]);
409                                                         
410                                                                 if(qFileName != ""){
411                                                                         m->mothurRemove(qualFileNames[i][j]);
412                                                                         namesToRemove.insert(qualFileNames[i][j]);
413                                                                 }
414                                                                 
415                                                                 if(nameFile != ""){
416                                                                         m->mothurRemove(nameFileNames[i][j]);
417                                                                         namesToRemove.insert(nameFileNames[i][j]);
418                                                                 }
419                                                         }else{  
420                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
421                                                                 if (it == uniqueFastaNames.end()) {     
422                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
423                                                                 }       
424                                                         }
425                                                 }
426                                         }
427                                 }
428                         }
429                         
430                         //remove names for outputFileNames, just cleans up the output
431                         vector<string> outputNames2;
432                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
433                         outputNames = outputNames2;
434                         
435                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
436                                 ifstream in;
437                                 m->openInputFile(it->first, in);
438                                 
439                                 ofstream out;
440                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
441                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
442                                 m->openOutputFile(thisGroupName, out);
443                                 
444                                 while (!in.eof()){
445                                         if (m->control_pressed) { break; }
446                                         
447                                         Sequence currSeq(in); m->gobble(in);
448                                         out << currSeq.getName() << '\t' << it->second << endl;
449                                 }
450                                 in.close();
451                                 out.close();
452                         }
453                 }
454                 
455                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
456
457                 //output group counts
458                 m->mothurOutEndLine();
459                 int total = 0;
460                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
461                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
462                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
463                 }
464                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
465                 
466                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
467
468                 //set fasta file as new current fastafile
469                 string current = "";
470                 itTypes = outputTypes.find("fasta");
471                 if (itTypes != outputTypes.end()) {
472                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
473                 }
474                 
475                 itTypes = outputTypes.find("name");
476                 if (itTypes != outputTypes.end()) {
477                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
478                 }
479                 
480                 itTypes = outputTypes.find("qfile");
481                 if (itTypes != outputTypes.end()) {
482                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
483                 }
484                 
485                 itTypes = outputTypes.find("group");
486                 if (itTypes != outputTypes.end()) {
487                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
488                 }
489
490                 m->mothurOutEndLine();
491                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
492                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
493                 m->mothurOutEndLine();
494                 
495                 return 0;       
496                         
497         }
498         catch(exception& e) {
499                 m->errorOut(e, "TrimSeqsCommand", "execute");
500                 exit(1);
501         }
502 }
503                 
504 /**************************************************************************************/
505
506 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair* line, linePair* qline) {    
507                 
508         try {
509                 
510                 ofstream trimFASTAFile;
511                 m->openOutputFile(trimFileName, trimFASTAFile);
512                 
513                 ofstream scrapFASTAFile;
514                 m->openOutputFile(scrapFileName, scrapFASTAFile);
515                 
516                 ofstream trimQualFile;
517                 ofstream scrapQualFile;
518                 if(qFileName != ""){
519                         m->openOutputFile(trimQFileName, trimQualFile);
520                         m->openOutputFile(scrapQFileName, scrapQualFile);
521                 }
522                 
523                 ofstream trimNameFile;
524                 ofstream scrapNameFile;
525                 if(nameFile != ""){
526                         m->openOutputFile(trimNFileName, trimNameFile);
527                         m->openOutputFile(scrapNFileName, scrapNameFile);
528                 }
529                 
530                 
531                 ofstream outGroupsFile;
532                 if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
533                 if(allFiles){
534                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
535                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
536                                         if (fastaFileNames[i][j] != "") {
537                                                 ofstream temp;
538                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
539                                                 if(qFileName != ""){
540                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
541                                                 }
542                                                 
543                                                 if(nameFile != ""){
544                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
545                                                 }
546                                         }
547                                 }
548                         }
549                 }
550                 
551                 ifstream inFASTA;
552                 m->openInputFile(filename, inFASTA);
553                 inFASTA.seekg(line->start);
554                 
555                 ifstream qFile;
556                 if(qFileName != "")     {
557                         m->openInputFile(qFileName, qFile);
558                         qFile.seekg(qline->start);  
559                 }
560                 
561                 int count = 0;
562                 bool moreSeqs = 1;
563                 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
564         
565                 while (moreSeqs) {
566                                 
567                         if (m->control_pressed) { 
568                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
569                                 if (createGroup) {       outGroupsFile.close();   }
570
571                                 if(qFileName != ""){
572                                         qFile.close();
573                                 }
574                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
575
576                                 return 0;
577                         }
578                         
579                         int success = 1;
580                         string trashCode = "";
581                         int currentSeqsDiffs = 0;
582
583                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
584                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
585                         QualityScores currQual;
586                         if(qFileName != ""){
587                                 currQual = QualityScores(qFile);  m->gobble(qFile);
588                         }
589                         
590                         string origSeq = currSeq.getUnaligned();
591                         if (origSeq != "") {
592                                 
593                                 int barcodeIndex = 0;
594                                 int primerIndex = 0;
595                                 
596                 if(numLinkers != 0){
597                                         success = trimOligos.stripLinker(currSeq, currQual);
598                                         if(!success)                            {       trashCode += 'k';       }
599                                 }
600                 
601                                 if(barcodes.size() != 0){
602                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
603                                         if(success > bdiffs)            {       trashCode += 'b';       }
604                                         else{ currentSeqsDiffs += success;  }
605                                 }
606                                 
607                 if(numSpacers != 0){
608                                         success = trimOligos.stripSpacer(currSeq, currQual);
609                                         if(!success)                            {       trashCode += 's';       }
610                                 }
611                 
612                                 if(numFPrimers != 0){
613                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
614                                         if(success > pdiffs)            {       trashCode += 'f';       }
615                                         else{ currentSeqsDiffs += success;  }
616                                 }
617                                 
618                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
619                                 
620                                 if(numRPrimers != 0){
621                                         success = trimOligos.stripReverse(currSeq, currQual);
622                                         if(!success)                            {       trashCode += 'r';       }
623                                 }
624
625                                 if(keepFirst != 0){
626                                         success = keepFirstTrim(currSeq, currQual);
627                                 }
628                                 
629                                 if(removeLast != 0){
630                                         success = removeLastTrim(currSeq, currQual);
631                                         if(!success)                            {       trashCode += 'l';       }
632                                 }
633
634                                 
635                                 if(qFileName != ""){
636                                         int origLength = currSeq.getNumBases();
637                                         
638                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
639                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
640                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
641                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
642                                         else                                            {       success = 1;                            }
643                                         
644                                         //you don't want to trim, if it fails above then scrap it
645                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
646                                         
647                                         if(!success)                            {       trashCode += 'q';       }
648                                 }                               
649                 
650                                 if(minLength > 0 || maxLength > 0){
651                                         success = cullLength(currSeq);
652                                         if(!success)                            {       trashCode += 'l';       }
653                                 }
654                                 if(maxHomoP > 0){
655                                         success = cullHomoP(currSeq);
656                                         if(!success)                            {       trashCode += 'h';       }
657                                 }
658                                 if(maxAmbig != -1){
659                                         success = cullAmbigs(currSeq);
660                                         if(!success)                            {       trashCode += 'n';       }
661                                 }
662                                 
663                                 if(flip){               // should go last                       
664                                         currSeq.reverseComplement();
665                                         if(qFileName != ""){
666                                                 currQual.flipQScores(); 
667                                         }
668                                 }
669                                 
670                                 if(trashCode.length() == 0){
671                                         currSeq.setAligned(currSeq.getUnaligned());
672                                         currSeq.printSequence(trimFASTAFile);
673                                         
674                                         if(qFileName != ""){
675                                                 currQual.printQScores(trimQualFile);
676                                         }
677                                         
678                                         if(nameFile != ""){
679                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
680                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
681                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
682                                         }
683                                         
684                                         if (createGroup) {
685                                                 if(barcodes.size() != 0){
686                                                         string thisGroup = barcodeNameVector[barcodeIndex];
687                                                         if (primers.size() != 0) { 
688                                                                 if (primerNameVector[primerIndex] != "") { 
689                                                                         if(thisGroup != "") {
690                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
691                                                                         }else {
692                                                                                 thisGroup = primerNameVector[primerIndex]; 
693                                                                         }
694                                                                 } 
695                                                         }
696                                                         
697                                                         outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
698                                                         
699                                                         if (nameFile != "") {
700                                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
701                                                                 if (itName != nameMap.end()) { 
702                                                                         vector<string> thisSeqsNames; 
703                                                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
704                                                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
705                                                                                 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
706                                                                         }
707                                                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
708                                                         }
709                                                         
710                                                         map<string, int>::iterator it = groupCounts.find(thisGroup);
711                                                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
712                                                         else { groupCounts[it->first]++; }
713                                                                 
714                                                 }
715                                         }
716                                         
717                                         if(allFiles){
718                                                 ofstream output;
719                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
720                                                 currSeq.printSequence(output);
721                                                 output.close();
722                                                 
723                                                 if(qFileName != ""){
724                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
725                                                         currQual.printQScores(output);
726                                                         output.close();                                                 
727                                                 }
728                                                 
729                                                 if(nameFile != ""){
730                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
731                                                         if (itName != nameMap.end()) { 
732                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
733                                                                 output << itName->first << '\t' << itName->second << endl; 
734                                                                 output.close();
735                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
736                                                 }
737                                         }
738                                 }
739                                 else{
740                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
741                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
742                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
743                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
744                                         }
745                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
746                                         currSeq.setUnaligned(origSeq);
747                                         currSeq.setAligned(origSeq);
748                                         currSeq.printSequence(scrapFASTAFile);
749                                         if(qFileName != ""){
750                                                 currQual.printQScores(scrapQualFile);
751                                         }
752                                 }
753                                 count++;
754                         }
755                         
756                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
757                                 unsigned long long pos = inFASTA.tellg();
758                                 if ((pos == -1) || (pos >= line->end)) { break; }
759                         
760                         #else
761                                 if (inFASTA.eof()) { break; }
762                         #endif
763                         
764                         //report progress
765                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
766                         
767                 }
768                 //report progress
769                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
770                 
771                 
772                 inFASTA.close();
773                 trimFASTAFile.close();
774                 scrapFASTAFile.close();
775                 if (createGroup) {       outGroupsFile.close();   }
776                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
777                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
778                 
779                 return count;
780         }
781         catch(exception& e) {
782                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
783                 exit(1);
784         }
785 }
786
787 /**************************************************************************************************/
788
789 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
790         try {
791 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
792                 int process = 1;
793                 int exitCommand = 1;
794                 processIDS.clear();
795                 
796                 //loop through and create all the processes you want
797                 while (process != processors) {
798                         int pid = fork();
799                         
800                         if (pid > 0) {
801                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
802                                 process++;
803                         }else if (pid == 0){
804                                 
805                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
806                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
807                                 vector<vector<string> > tempNameFileNames = nameFileNames;
808
809                                 if(allFiles){
810                                         ofstream temp;
811
812                                         for(int i=0;i<tempFASTAFileNames.size();i++){
813                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
814                                                         if (tempFASTAFileNames[i][j] != "") {
815                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
816                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
817
818                                                                 if(qFileName != ""){
819                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
820                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
821                                                                 }
822                                                                 if(nameFile != ""){
823                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
824                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
825                                                                 }
826                                                         }
827                                                 }
828                                         }
829                                 }
830                                                         
831                                 driverCreateTrim(filename,
832                                                                  qFileName,
833                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
834                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
835                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
836                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
837                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
838                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
839                                                                  (groupFile + toString(getpid()) + ".temp"),
840                                                                  tempFASTAFileNames,
841                                                                  tempPrimerQualFileNames,
842                                                                  tempNameFileNames,
843                                                                  lines[process],
844                                                                  qLines[process]);
845                                 
846                                 //pass groupCounts to parent
847                                 if(createGroup){
848                                         ofstream out;
849                                         string tempFile = filename + toString(getpid()) + ".num.temp";
850                                         m->openOutputFile(tempFile, out);
851                                         
852                                         out << groupCounts.size() << endl;
853                                         
854                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
855                                                 out << it->first << '\t' << it->second << endl;
856                                         }
857                                         out.close();
858                                 }
859                                 exit(0);
860                         }else { 
861                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
862                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
863                                 exit(0);
864                         }
865                 }
866                 
867                 //parent do my part
868                 ofstream temp;
869                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
870                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
871                 if(qFileName != ""){
872                         m->openOutputFile(trimQualFileName, temp);              temp.close();
873                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
874                 }
875                 if (nameFile != "") {
876                         m->openOutputFile(trimNameFileName, temp);              temp.close();
877                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
878                 }
879
880                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
881                 
882                 //force parent to wait until all the processes are done
883                 for (int i=0;i<processIDS.size();i++) { 
884                         int temp = processIDS[i];
885                         wait(&temp);
886                 }
887                 
888                 //append files
889                 for(int i=0;i<processIDS.size();i++){
890                         
891                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
892                         
893                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
894                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
895                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
896                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
897                         
898                         if(qFileName != ""){
899                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
900                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
901                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
902                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
903                         }
904                         
905                         if(nameFile != ""){
906                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
907                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
908                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
909                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
910                         }
911                         
912                         if(createGroup){
913                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
914                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
915                         }
916                         
917                         
918                         if(allFiles){
919                                 for(int j=0;j<fastaFileNames.size();j++){
920                                         for(int k=0;k<fastaFileNames[j].size();k++){
921                                                 if (fastaFileNames[j][k] != "") {
922                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
923                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
924                                                         
925                                                         if(qFileName != ""){
926                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
927                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
928                                                         }
929                                                         
930                                                         if(nameFile != ""){
931                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
932                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
933                                                         }
934                                                 }
935                                         }
936                                 }
937                         }
938                         
939                         if(createGroup){
940                                 ifstream in;
941                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
942                                 m->openInputFile(tempFile, in);
943                                 int tempNum;
944                                 string group;
945                                 
946                                 in >> tempNum; m->gobble(in);
947                                 
948                                 if (tempNum != 0) {
949                                         while (!in.eof()) { 
950                                                 in >> group >> tempNum; m->gobble(in);
951                                 
952                                                 map<string, int>::iterator it = groupCounts.find(group);
953                                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
954                                                 else { groupCounts[it->first] += tempNum; }
955                                         }
956                                 }
957                                 in.close(); m->mothurRemove(tempFile);
958                         }
959                         
960                 }
961         
962                 return exitCommand;
963 #endif          
964         }
965         catch(exception& e) {
966                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
967                 exit(1);
968         }
969 }
970
971 /**************************************************************************************************/
972
973 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long long>& fastaFilePos, vector<unsigned long long>& qfileFilePos) {
974         try {
975                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
976                 //set file positions for fasta file
977                 fastaFilePos = m->divideFile(filename, processors);
978                 
979                 if (qfilename == "") { return processors; }
980                 
981                 //get name of first sequence in each chunk
982                 map<string, int> firstSeqNames;
983                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
984                         ifstream in;
985                         m->openInputFile(filename, in);
986                         in.seekg(fastaFilePos[i]);
987                 
988                         Sequence temp(in); 
989                         firstSeqNames[temp.getName()] = i;
990                 
991                         in.close();
992                 }
993                                 
994                 //seach for filePos of each first name in the qfile and save in qfileFilePos
995                 ifstream inQual;
996                 m->openInputFile(qfilename, inQual);
997                 
998                 string input;
999                 while(!inQual.eof()){   
1000                         input = m->getline(inQual);
1001
1002                         if (input.length() != 0) {
1003                                 if(input[0] == '>'){ //this is a sequence name line
1004                                         istringstream nameStream(input);
1005                                         
1006                                         string sname = "";  nameStream >> sname;
1007                                         sname = sname.substr(1);
1008                                         
1009                                         map<string, int>::iterator it = firstSeqNames.find(sname);
1010                                         
1011                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
1012                                                 unsigned long long pos = inQual.tellg(); 
1013                                                 qfileFilePos.push_back(pos - input.length() - 1);       
1014                                                 firstSeqNames.erase(it);
1015                                         }
1016                                 }
1017                         }
1018                         
1019                         if (firstSeqNames.size() == 0) { break; }
1020                 }
1021                 inQual.close();
1022                 
1023                 
1024                 if (firstSeqNames.size() != 0) { 
1025                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1026                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1027                         }
1028                         qFileName = "";
1029                         return processors;
1030                 }
1031
1032                 //get last file position of qfile
1033                 FILE * pFile;
1034                 unsigned long long size;
1035                 
1036                 //get num bytes in file
1037                 pFile = fopen (qfilename.c_str(),"rb");
1038                 if (pFile==NULL) perror ("Error opening file");
1039                 else{
1040                         fseek (pFile, 0, SEEK_END);
1041                         size=ftell (pFile);
1042                         fclose (pFile);
1043                 }
1044                 
1045                 qfileFilePos.push_back(size);
1046                 
1047                 return processors;
1048                 
1049                 #else
1050                 
1051                         fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1052                         fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1053                         return 1;
1054                 
1055                 #endif
1056         }
1057         catch(exception& e) {
1058                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1059                 exit(1);
1060         }
1061 }
1062
1063 //***************************************************************************************************************
1064
1065 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1066         try {
1067                 ifstream inOligos;
1068                 m->openInputFile(oligoFile, inOligos);
1069                 
1070                 ofstream test;
1071                 
1072                 string type, oligo, group;
1073
1074                 int indexPrimer = 0;
1075                 int indexBarcode = 0;
1076                 
1077                 while(!inOligos.eof()){
1078
1079                         inOligos >> type; 
1080                                         
1081                         if(type[0] == '#'){
1082                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1083                                 m->gobble(inOligos);
1084                         }
1085                         else{
1086                                 m->gobble(inOligos);
1087                                 //make type case insensitive
1088                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1089                                 
1090                                 inOligos >> oligo;
1091                                 
1092                                 for(int i=0;i<oligo.length();i++){
1093                                         oligo[i] = toupper(oligo[i]);
1094                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1095                                 }
1096                                 
1097                                 if(type == "FORWARD"){
1098                                         group = "";
1099                                         
1100                                         // get rest of line in case there is a primer name
1101                                         while (!inOligos.eof()) {       
1102                                                 char c = inOligos.get(); 
1103                                                 if (c == 10 || c == 13){        break;  }
1104                                                 else if (c == 32 || c == 9){;} //space or tab
1105                                                 else {  group += c;  }
1106                                         } 
1107                                         
1108                                         //check for repeat barcodes
1109                                         map<string, int>::iterator itPrime = primers.find(oligo);
1110                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1111                                         
1112                                         primers[oligo]=indexPrimer; indexPrimer++;              
1113                                         primerNameVector.push_back(group);
1114                                 }
1115                                 else if(type == "REVERSE"){
1116                                         Sequence oligoRC("reverse", oligo);
1117                                         oligoRC.reverseComplement();
1118                                         revPrimer.push_back(oligoRC.getUnaligned());
1119                                 }
1120                                 else if(type == "BARCODE"){
1121                                         inOligos >> group;
1122                                         
1123                                         //check for repeat barcodes
1124                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1125                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1126                                                 
1127                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1128                                         barcodeNameVector.push_back(group);
1129                                 }else if(type == "LINKER"){
1130                                         linker.push_back(oligo);
1131                                 }else if(type == "SPACER"){
1132                                         spacer.push_back(oligo);
1133                                 }
1134                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
1135                         }
1136                         m->gobble(inOligos);
1137                 }       
1138                 inOligos.close();
1139                 
1140                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1141                 
1142                 //add in potential combos
1143                 if(barcodeNameVector.size() == 0){
1144                         barcodes[""] = 0;
1145                         barcodeNameVector.push_back("");                        
1146                 }
1147                 
1148                 if(primerNameVector.size() == 0){
1149                         primers[""] = 0;
1150                         primerNameVector.push_back("");                 
1151                 }
1152                 
1153                 fastaFileNames.resize(barcodeNameVector.size());
1154                 for(int i=0;i<fastaFileNames.size();i++){
1155                         fastaFileNames[i].assign(primerNameVector.size(), "");
1156                 }
1157                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1158                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1159                 
1160                 if(allFiles){
1161                         set<string> uniqueNames; //used to cleanup outputFileNames
1162                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1163                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1164                                         
1165                                         string primerName = primerNameVector[itPrimer->second];
1166                                         string barcodeName = barcodeNameVector[itBar->second];
1167                                         
1168                                         string comboGroupName = "";
1169                                         string fastaFileName = "";
1170                                         string qualFileName = "";
1171                                         string nameFileName = "";
1172                                         
1173                                         if(primerName == ""){
1174                                                 comboGroupName = barcodeNameVector[itBar->second];
1175                                         }
1176                                         else{
1177                                                 if(barcodeName == ""){
1178                                                         comboGroupName = primerNameVector[itPrimer->second];
1179                                                 }
1180                                                 else{
1181                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1182                                                 }
1183                                         }
1184                                         
1185                                         
1186                                         ofstream temp;
1187                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1188                                         if (uniqueNames.count(fastaFileName) == 0) {
1189                                                 outputNames.push_back(fastaFileName);
1190                                                 outputTypes["fasta"].push_back(fastaFileName);
1191                                                 uniqueNames.insert(fastaFileName);
1192                                         }
1193                                         
1194                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1195                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1196                                         
1197                                         if(qFileName != ""){
1198                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1199                                                 if (uniqueNames.count(qualFileName) == 0) {
1200                                                         outputNames.push_back(qualFileName);
1201                                                         outputTypes["qfile"].push_back(qualFileName);
1202                                                 }
1203                                                 
1204                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1205                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1206                                         }
1207                                         
1208                                         if(nameFile != ""){
1209                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1210                                                 if (uniqueNames.count(nameFileName) == 0) {
1211                                                         outputNames.push_back(nameFileName);
1212                                                         outputTypes["name"].push_back(nameFileName);
1213                                                 }
1214                                                 
1215                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1216                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1217                                         }
1218                                         
1219                                 }
1220                         }
1221                 }
1222                 numFPrimers = primers.size();
1223                 numRPrimers = revPrimer.size();
1224         numLinkers = linker.size();
1225         numSpacers = spacer.size();
1226                 
1227                 bool allBlank = true;
1228                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1229                         if (barcodeNameVector[i] != "") {
1230                                 allBlank = false;
1231                                 break;
1232                         }
1233                 }
1234                 for (int i = 0; i < primerNameVector.size(); i++) {
1235                         if (primerNameVector[i] != "") {
1236                                 allBlank = false;
1237                                 break;
1238                         }
1239                 }
1240                 
1241                 if (allBlank) {
1242                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1243                         allFiles = false;
1244                         return false;
1245                 }
1246                 
1247                 return true;
1248                 
1249         }
1250         catch(exception& e) {
1251                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1252                 exit(1);
1253         }
1254 }
1255 //***************************************************************************************************************
1256
1257 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1258         try {
1259                 bool success = 1;
1260                 if(qscores.getName() != ""){
1261                         qscores.trimQScores(-1, keepFirst);
1262                 }
1263                 sequence.trim(keepFirst);
1264                 return success;
1265         }
1266         catch(exception& e) {
1267                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1268                 exit(1);
1269         }
1270         
1271 }       
1272
1273 //***************************************************************************************************************
1274
1275 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1276         try {
1277                 bool success = 0;
1278                 
1279                 int length = sequence.getNumBases() - removeLast;
1280                 
1281                 if(length > 0){
1282                         if(qscores.getName() != ""){
1283                                 qscores.trimQScores(-1, length);
1284                         }
1285                         sequence.trim(length);
1286                         success = 1;
1287                 }
1288                 else{
1289                         success = 0;
1290                 }
1291
1292                 return success;
1293         }
1294         catch(exception& e) {
1295                 m->errorOut(e, "removeLastTrim", "countDiffs");
1296                 exit(1);
1297         }
1298         
1299 }       
1300
1301 //***************************************************************************************************************
1302
1303 bool TrimSeqsCommand::cullLength(Sequence& seq){
1304         try {
1305         
1306                 int length = seq.getNumBases();
1307                 bool success = 0;       //guilty until proven innocent
1308                 
1309                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1310                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1311                 else                                                                                            {       success = 0;    }
1312                 
1313                 return success;
1314         
1315         }
1316         catch(exception& e) {
1317                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1318                 exit(1);
1319         }
1320         
1321 }
1322
1323 //***************************************************************************************************************
1324
1325 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1326         try {
1327                 int longHomoP = seq.getLongHomoPolymer();
1328                 bool success = 0;       //guilty until proven innocent
1329                 
1330                 if(longHomoP <= maxHomoP){      success = 1;    }
1331                 else                                    {       success = 0;    }
1332                 
1333                 return success;
1334         }
1335         catch(exception& e) {
1336                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1337                 exit(1);
1338         }
1339         
1340 }
1341
1342 //***************************************************************************************************************
1343
1344 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1345         try {
1346                 int numNs = seq.getAmbigBases();
1347                 bool success = 0;       //guilty until proven innocent
1348                 
1349                 if(numNs <= maxAmbig)   {       success = 1;    }
1350                 else                                    {       success = 0;    }
1351                 
1352                 return success;
1353         }
1354         catch(exception& e) {
1355                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1356                 exit(1);
1357         }
1358         
1359 }
1360 //***************************************************************************************************************