]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
mods to resolve some misc warnings. added alignPrimer function to needle man to allow...
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14
15 //**********************************************************************************************************************
16 vector<string> TrimSeqsCommand::setParameters(){        
17         try {
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
19                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
20                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","qfile",false,false,true); parameters.push_back(pqfile);
21                 CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname);
22         CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount);
23                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pflip);
24         CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
25                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig);
26                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop);
27                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength);
28                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength);
29                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
30                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
31         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
32                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
33         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
34                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
35                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
36                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward);
37                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim);
38                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold);
39                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage);
40                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(prollaverage);
41                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqwindowaverage);
42                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pqstepsize);
43                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "","",false,false); parameters.push_back(pqwindowsize);
44                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pkeepfirst);
45                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "","",false,false); parameters.push_back(premovelast);
46                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
47                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
48                         
49                 vector<string> myArray;
50                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
51                 return myArray;
52         }
53         catch(exception& e) {
54                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
55                 exit(1);
56         }
57 }
58 //**********************************************************************************************************************
59 string TrimSeqsCommand::getHelpString(){        
60         try {
61                 string helpString = "";
62                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
63                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
64                 helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
65                 helpString += "The fasta parameter is required.\n";
66                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
67         helpString += "The checkorient parameter will check the reverse compliment of the sequence if the barcodes and primers cannot be found in the forward. The default is false.\n";
68                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
69                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
70         helpString += "The count parameter allows you to provide a count file with your fasta file.\n";
71                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
72                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
73                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
74                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
75                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
76                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
77                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
78         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
79                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
80                 helpString += "The qfile parameter allows you to provide a quality file.\n";
81                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
82                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
83                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
84                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
85                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
86                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
87                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
88                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
89                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
90                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
91                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
92                 helpString += "The trim.seqs command should be in the following format: \n";
93                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
94                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
95                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
96                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
97                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
98                 return helpString;
99         }
100         catch(exception& e) {
101                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
102                 exit(1);
103         }
104 }
105 //**********************************************************************************************************************
106 string TrimSeqsCommand::getOutputPattern(string type) {
107     try {
108         string pattern = "";
109         
110         if (type == "qfile") {  pattern = "[filename],[tag],qual"; } 
111         else if (type == "fasta") {  pattern = "[filename],[tag],fasta"; } 
112         else if (type == "group") {  pattern = "[filename],groups"; }
113         else if (type == "name") {  pattern = "[filename],[tag],names"; }
114         else if (type == "count") {  pattern = "[filename],[tag],count_table-[filename],count_table"; }
115         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
116         
117         return pattern;
118     }
119     catch(exception& e) {
120         m->errorOut(e, "TrimSeqsCommand", "getOutputPattern");
121         exit(1);
122     }
123 }
124 //**********************************************************************************************************************
125
126 TrimSeqsCommand::TrimSeqsCommand(){     
127         try {
128                 abort = true; calledHelp = true;
129                 setParameters();
130                 vector<string> tempOutNames;
131                 outputTypes["fasta"] = tempOutNames;
132                 outputTypes["qfile"] = tempOutNames;
133                 outputTypes["group"] = tempOutNames;
134                 outputTypes["name"] = tempOutNames;
135         outputTypes["count"] = tempOutNames;
136         }
137         catch(exception& e) {
138                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
139                 exit(1);
140         }
141 }
142 //***************************************************************************************************************
143
144 TrimSeqsCommand::TrimSeqsCommand(string option)  {
145         try {
146                 
147                 abort = false; calledHelp = false;   
148                 comboStarts = 0;
149                 
150                 //allow user to run help
151                 if(option == "help") { help(); abort = true; calledHelp = true; }
152                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
153                 
154                 else {
155                         vector<string> myArray = setParameters();
156                         
157                         OptionParser parser(option);
158                         map<string,string> parameters = parser.getParameters();
159                         
160                         ValidParameters validParameter;
161                         map<string,string>::iterator it;
162                         
163                         //check to make sure all parameters are valid for command
164                         for (it = parameters.begin(); it != parameters.end(); it++) { 
165                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
166                         }
167                         
168                         //initialize outputTypes
169                         vector<string> tempOutNames;
170                         outputTypes["fasta"] = tempOutNames;
171                         outputTypes["qfile"] = tempOutNames;
172                         outputTypes["group"] = tempOutNames;
173                         outputTypes["name"] = tempOutNames;
174             outputTypes["count"] = tempOutNames;
175                         
176                         //if the user changes the input directory command factory will send this info to us in the output parameter 
177                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
178                         if (inputDir == "not found"){   inputDir = "";          }
179                         else {
180                                 string path;
181                                 it = parameters.find("fasta");
182                                 //user has given a template file
183                                 if(it != parameters.end()){ 
184                                         path = m->hasPath(it->second);
185                                         //if the user has not given a path then, add inputdir. else leave path alone.
186                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
187                                 }
188                                 
189                                 it = parameters.find("oligos");
190                                 //user has given a template file
191                                 if(it != parameters.end()){ 
192                                         path = m->hasPath(it->second);
193                                         //if the user has not given a path then, add inputdir. else leave path alone.
194                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
195                                 }
196                                 
197                                 it = parameters.find("qfile");
198                                 //user has given a template file
199                                 if(it != parameters.end()){ 
200                                         path = m->hasPath(it->second);
201                                         //if the user has not given a path then, add inputdir. else leave path alone.
202                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
203                                 }
204                                 
205                                 it = parameters.find("name");
206                                 //user has given a template file
207                                 if(it != parameters.end()){ 
208                                         path = m->hasPath(it->second);
209                                         //if the user has not given a path then, add inputdir. else leave path alone.
210                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
211                                 }
212                 
213                 it = parameters.find("count");
214                                 //user has given a template file
215                                 if(it != parameters.end()){ 
216                                         path = m->hasPath(it->second);
217                                         //if the user has not given a path then, add inputdir. else leave path alone.
218                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
219                                 }
220                                 
221                         }
222
223                         
224                         //check for required parameters
225                         fastaFile = validParameter.validFile(parameters, "fasta", true);
226                         if (fastaFile == "not found") {                                 
227                                 fastaFile = m->getFastaFile(); 
228                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
229                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
230                         }else if (fastaFile == "not open") { abort = true; }    
231                         else { m->setFastaFile(fastaFile); }
232                         
233                         //if the user changes the output directory command factory will send this info to us in the output parameter 
234                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
235                                 outputDir = ""; 
236                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
237                         }
238                 
239                         
240                         //check for optional parameter and set defaults
241                         // ...at some point should added some additional type checking...
242                         string temp;
243                         temp = validParameter.validFile(parameters, "flip", false);
244                         if (temp == "not found")    {   flip = 0;       }
245                         else {  flip = m->isTrue(temp);         }
246                 
247                         temp = validParameter.validFile(parameters, "oligos", true);
248                         if (temp == "not found"){       oligoFile = "";         }
249                         else if(temp == "not open"){    abort = true;   } 
250                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
251                         
252                         
253                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
254                         m->mothurConvert(temp, maxAmbig);  
255
256                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
257                         m->mothurConvert(temp, maxHomoP);  
258
259                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
260                         m->mothurConvert(temp, minLength); 
261                         
262                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
263                         m->mothurConvert(temp, maxLength);
264                         
265                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
266                         m->mothurConvert(temp, bdiffs);
267                         
268                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
269                         m->mothurConvert(temp, pdiffs);
270             
271             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
272                         m->mothurConvert(temp, ldiffs);
273             
274             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
275                         m->mothurConvert(temp, sdiffs);
276                         
277                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
278                         m->mothurConvert(temp, tdiffs);
279                         
280                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
281                         
282                         temp = validParameter.validFile(parameters, "qfile", true);     
283                         if (temp == "not found")        {       qFileName = "";         }
284                         else if(temp == "not open")     {       abort = true;           }
285                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
286                         
287                         temp = validParameter.validFile(parameters, "name", true);      
288                         if (temp == "not found")        {       nameFile = "";          }
289                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
290                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
291             
292             countfile = validParameter.validFile(parameters, "count", true);
293                         if (countfile == "not open") { abort = true; countfile = ""; }  
294                         else if (countfile == "not found") { countfile = ""; }
295                         else { m->setCountTableFile(countfile); }
296                         
297             if ((countfile != "") && (nameFile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
298                         
299                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
300                         m->mothurConvert(temp, qThreshold);
301                         
302                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
303                         qtrim = m->isTrue(temp);
304
305                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
306                         convert(temp, qRollAverage);
307
308                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
309                         convert(temp, qWindowAverage);
310
311                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
312                         convert(temp, qWindowSize);
313
314                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
315                         convert(temp, qWindowStep);
316
317                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
318                         convert(temp, qAverage);
319
320                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
321                         convert(temp, keepFirst);
322
323                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
324                         convert(temp, removeLast);
325                         
326                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
327                         allFiles = m->isTrue(temp);
328             
329             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
330                         keepforward = m->isTrue(temp);
331             
332             temp = validParameter.validFile(parameters, "checkorient", false);          if (temp == "not found") { temp = "F"; }
333                         reorient = m->isTrue(temp);
334                         
335                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
336                         m->setProcessors(temp);
337                         m->mothurConvert(temp, processors); 
338                         
339                         
340                         if(allFiles && (oligoFile == "")){
341                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
342                         }
343                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
344                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
345                                 qAverage=0;
346                                 qThreshold=0;
347                         }
348                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
349                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
350                                 abort = true;
351                         }
352                         
353             if (countfile == "") {
354                 if (nameFile == "") {
355                     vector<string> files; files.push_back(fastaFile);
356                     parser.getNameFile(files);
357                 }
358             }
359                 }
360
361         }
362         catch(exception& e) {
363                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
364                 exit(1);
365         }
366 }
367 //***************************************************************************************************************
368
369 int TrimSeqsCommand::execute(){
370         try{
371         
372                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
373                 
374         pairedOligos = false;
375                 numFPrimers = 0;  //this needs to be initialized
376                 numRPrimers = 0;
377         numSpacers = 0;
378         numLinkers = 0;
379                 createGroup = false;
380                 vector<vector<string> > fastaFileNames;
381                 vector<vector<string> > qualFileNames;
382                 vector<vector<string> > nameFileNames;
383                 
384         map<string, string> variables; 
385                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
386         variables["[tag]"] = "trim";
387                 string trimSeqFile = getOutputFileName("fasta",variables);
388         string trimQualFile = getOutputFileName("qfile",variables);
389                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
390         
391         variables["[tag]"] = "scrap";
392                 string scrapSeqFile = getOutputFileName("fasta",variables);
393         string scrapQualFile = getOutputFileName("qfile",variables);
394                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
395                 
396                 if (qFileName != "") {
397                         outputNames.push_back(trimQualFile);
398                         outputNames.push_back(scrapQualFile);
399                         outputTypes["qfile"].push_back(trimQualFile);
400                         outputTypes["qfile"].push_back(scrapQualFile); 
401                 }
402                 
403         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
404         variables["[tag]"] = "trim";
405                 string trimNameFile = getOutputFileName("name",variables);
406         variables["[tag]"] = "scrap";
407                 string scrapNameFile = getOutputFileName("name",variables);
408                 
409                 if (nameFile != "") {
410                         m->readNames(nameFile, nameMap);
411                         outputNames.push_back(trimNameFile);
412                         outputNames.push_back(scrapNameFile);
413                         outputTypes["name"].push_back(trimNameFile);
414                         outputTypes["name"].push_back(scrapNameFile); 
415                 }
416         
417         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
418         variables["[tag]"] = "trim";
419         string trimCountFile = getOutputFileName("count",variables);
420         variables["[tag]"] = "scrap";
421                 string scrapCountFile = getOutputFileName("count",variables);
422                 
423                 if (countfile != "") {
424             CountTable ct;
425             ct.readTable(countfile);
426             nameCount = ct.getNameMap();
427                         outputNames.push_back(trimCountFile);
428                         outputNames.push_back(scrapCountFile);
429                         outputTypes["count"].push_back(trimCountFile);
430                         outputTypes["count"].push_back(scrapCountFile); 
431                 }
432
433                 
434                 if (m->control_pressed) { return 0; }
435                 
436                 string outputGroupFileName;
437                 if(oligoFile != ""){
438                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
439                         if ((createGroup) && (countfile == "")){
440                 map<string, string> myvariables; 
441                 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
442                                 outputGroupFileName = getOutputFileName("group",myvariables);
443                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
444                         }
445                 }
446        
447         if (m->control_pressed) { return 0; }
448             
449         //fills lines and qlines
450                 setLines(fastaFile, qFileName);
451                 
452         if(processors == 1){
453             driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
454         }else{
455             createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
456         }       
457                 
458                 
459                 if (m->control_pressed) {  return 0; }                  
460         
461                 if(allFiles){
462                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
463                         map<string, string>::iterator it;
464                         set<string> namesToRemove;
465                         for(int i=0;i<fastaFileNames.size();i++){
466                                 for(int j=0;j<fastaFileNames[0].size();j++){
467                                         if (fastaFileNames[i][j] != "") {
468                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
469                                                         if(m->isBlank(fastaFileNames[i][j])){
470                                                                 m->mothurRemove(fastaFileNames[i][j]);
471                                                                 namesToRemove.insert(fastaFileNames[i][j]);
472                                                         
473                                                                 if(qFileName != ""){
474                                                                         m->mothurRemove(qualFileNames[i][j]);
475                                                                         namesToRemove.insert(qualFileNames[i][j]);
476                                                                 }
477                                                                 
478                                                                 if(nameFile != ""){
479                                                                         m->mothurRemove(nameFileNames[i][j]);
480                                                                         namesToRemove.insert(nameFileNames[i][j]);
481                                                                 }
482                                                         }else{  
483                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
484                                                                 if (it == uniqueFastaNames.end()) {     
485                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
486                                                                 }       
487                                                         }
488                                                 }
489                                         }
490                                 }
491                         }
492                         
493                         //remove names for outputFileNames, just cleans up the output
494                         vector<string> outputNames2;
495                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
496                         outputNames = outputNames2;
497                         
498             for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
499                 ifstream in;
500                 m->openInputFile(it->first, in);
501                 
502                 ofstream out;
503                 map<string, string> myvariables; 
504                 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(it->first));
505                 string thisGroupName = "";
506                 if (countfile == "") { thisGroupName = getOutputFileName("group",myvariables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); }
507                 else {  thisGroupName = getOutputFileName("count",myvariables); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName);  }
508                 m->openOutputFile(thisGroupName, out);
509                 
510                 if (countfile != "") {  out << "Representative_Sequence\ttotal\t" << it->second << endl;  }
511                 
512                 while (!in.eof()){
513                     if (m->control_pressed) { break; }
514                     
515                     Sequence currSeq(in); m->gobble(in);
516                     if (countfile == "") {  
517                         out << currSeq.getName() << '\t' << it->second << endl;  
518                         
519                         if (nameFile != "") {
520                             map<string, string>::iterator itName = nameMap.find(currSeq.getName());
521                             if (itName != nameMap.end()) { 
522                                 vector<string> thisSeqsNames; 
523                                 m->splitAtChar(itName->second, thisSeqsNames, ',');
524                                 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
525                                     out << thisSeqsNames[k] << '\t' << it->second << endl;
526                                 }
527                             }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                       
528                         }
529                     }else { 
530                         map<string, int>::iterator itTotalReps = nameCount.find(currSeq.getName());
531                         if (itTotalReps != nameCount.end()) { out << currSeq.getName() << '\t' << itTotalReps->second << '\t' << itTotalReps->second << endl; }
532                         else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
533                     }
534                 }
535                 in.close();
536                 out.close();
537             }
538             
539             if (countfile != "") { //create countfile with group info included
540                 CountTable* ct = new CountTable();
541                 ct->readTable(trimCountFile);
542                 map<string, int> justTrimmedNames = ct->getNameMap();
543                 delete ct;
544                 
545                 CountTable newCt;
546                 for (map<string, int>::iterator itCount = groupCounts.begin(); itCount != groupCounts.end(); itCount++) { newCt.addGroup(itCount->first); }
547                 vector<int> tempCounts; tempCounts.resize(groupCounts.size(), 0);
548                 for (map<string, int>::iterator itNames = justTrimmedNames.begin(); itNames != justTrimmedNames.end(); itNames++) {
549                     newCt.push_back(itNames->first, tempCounts); //add it to the table with no abundance so we can set the groups abundance
550                     map<string, string>::iterator it2 = groupMap.find(itNames->first);
551                     if (it2 != groupMap.end()) { newCt.setAbund(itNames->first, it2->second, itNames->second); }
552                     else { m->mothurOut("[ERROR]: missing group info for " + itNames->first + "."); m->mothurOutEndLine(); m->control_pressed = true; }
553                 }
554                 newCt.printTable(trimCountFile);
555             }
556                 }
557                 
558                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
559
560                 //output group counts
561                 m->mothurOutEndLine();
562                 int total = 0;
563                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
564                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
565                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
566                 }
567                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
568                 
569                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
570
571                 //set fasta file as new current fastafile
572                 string current = "";
573                 itTypes = outputTypes.find("fasta");
574                 if (itTypes != outputTypes.end()) {
575                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
576                 }
577                 
578                 itTypes = outputTypes.find("name");
579                 if (itTypes != outputTypes.end()) {
580                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
581                 }
582                 
583                 itTypes = outputTypes.find("qfile");
584                 if (itTypes != outputTypes.end()) {
585                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
586                 }
587                 
588                 itTypes = outputTypes.find("group");
589                 if (itTypes != outputTypes.end()) {
590                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
591                 }
592         
593         itTypes = outputTypes.find("count");
594                 if (itTypes != outputTypes.end()) {
595                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
596                 }
597
598                 m->mothurOutEndLine();
599                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
600                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
601                 m->mothurOutEndLine();
602                 
603                 return 0;       
604                         
605         }
606         catch(exception& e) {
607                 m->errorOut(e, "TrimSeqsCommand", "execute");
608                 exit(1);
609         }
610 }
611                 
612 /**************************************************************************************/
613 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string trimCFileName, string scrapCFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) { 
614                 
615         try {
616                 
617                 ofstream trimFASTAFile;
618                 m->openOutputFile(trimFileName, trimFASTAFile);
619                 
620                 ofstream scrapFASTAFile;
621                 m->openOutputFile(scrapFileName, scrapFASTAFile);
622                 
623                 ofstream trimQualFile;
624                 ofstream scrapQualFile;
625                 if(qFileName != ""){
626                         m->openOutputFile(trimQFileName, trimQualFile);
627                         m->openOutputFile(scrapQFileName, scrapQualFile);
628                 }
629                 
630                 ofstream trimNameFile;
631                 ofstream scrapNameFile;
632                 if(nameFile != ""){
633                         m->openOutputFile(trimNFileName, trimNameFile);
634                         m->openOutputFile(scrapNFileName, scrapNameFile);
635                 }
636                 
637         ofstream trimCountFile;
638                 ofstream scrapCountFile;
639                 if(countfile != ""){
640                         m->openOutputFile(trimCFileName, trimCountFile);
641                         m->openOutputFile(scrapCFileName, scrapCountFile);
642             if (line.start == 0) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
643                 }
644                 
645                 ofstream outGroupsFile;
646                 if ((createGroup) && (countfile == "")){        m->openOutputFile(groupFileName, outGroupsFile);   }
647                 if(allFiles){
648                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
649                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
650                                         if (fastaFileNames[i][j] != "") {
651                                                 ofstream temp;
652                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
653                                                 if(qFileName != ""){
654                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
655                                                 }
656                                                 
657                                                 if(nameFile != ""){
658                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
659                                                 }
660                                         }
661                                 }
662                         }
663                 }
664                 
665                 ifstream inFASTA;
666                 m->openInputFile(filename, inFASTA);
667                 inFASTA.seekg(line.start);
668                 
669                 ifstream qFile;
670                 if(qFileName != "")     {
671                         m->openInputFile(qFileName, qFile);
672                         qFile.seekg(qline.start);  
673                 }
674                 
675                 int count = 0;
676                 bool moreSeqs = 1;
677         int numBarcodes = barcodes.size();
678                 TrimOligos* trimOligos = NULL;
679         if (pairedOligos)   {   trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes); numBarcodes = pairedBarcodes.size(); }
680         else                {   trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);  }
681         
682         TrimOligos* rtrimOligos = NULL;
683         if (reorient) {
684             //create reoriented primer and barcode pairs
685             map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
686             for (map<int, oligosPair>::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) {
687                 cout << "primer " << (it->second).forward << '\t' << (it->second).reverse << '\t' << primerNameVector[it->first] << endl;
688                 cout << "rprimer " << trimOligos->reverseOligo((it->second).reverse) << '\t' << (trimOligos->reverseOligo((it->second).forward)) << endl;
689                  oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
690                 rpairedPrimers[it->first] = tempPair;
691             }
692             for (map<int, oligosPair>::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) {
693                 cout << "barcode " << (it->second).forward << '\t' << (it->second).reverse << '\t' << barcodeNameVector[it->first] << endl;
694                 cout << "rbarcode " << trimOligos->reverseOligo((it->second).reverse) << '\t' << (trimOligos->reverseOligo((it->second).forward)) << endl;
695                 oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
696                 rpairedBarcodes[it->first] = tempPair;
697             }
698             rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
699         }
700         
701                 while (moreSeqs) {
702                                 
703                         if (m->control_pressed) {
704                 delete trimOligos; if (reorient) { delete rtrimOligos; }
705                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
706                                 if ((createGroup) && (countfile == "")) {        outGroupsFile.close();   }
707                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
708                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
709                 if(countfile != "")     {       scrapCountFile.close(); trimCountFile.close();  }
710                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;
711                         }
712                         
713                         int success = 1;
714                         string trashCode = "";
715                         int currentSeqsDiffs = 0;
716
717                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
718                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
719             
720                         QualityScores currQual;
721                         if(qFileName != ""){
722                                 currQual = QualityScores(qFile);  m->gobble(qFile);
723                 //cout << currQual.getName() << endl;
724                         }
725                         
726                         string origSeq = currSeq.getUnaligned();
727                         if (origSeq != "") {
728                                 
729                                 int barcodeIndex = 0;
730                                 int primerIndex = 0;
731                                 
732                 if(numLinkers != 0){
733                                         success = trimOligos->stripLinker(currSeq, currQual);
734                                         if(success > ldiffs)            {       trashCode += 'k';       }
735                                         else{ currentSeqsDiffs += success;  }
736
737                                 }
738                 
739                                 if(numBarcodes != 0){
740                                         success = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex);
741                                         if(success > bdiffs)            {       trashCode += 'b';       }
742                                         else{ currentSeqsDiffs += success;  }
743                                 }
744                                 
745                 if(numSpacers != 0){
746                                         success = trimOligos->stripSpacer(currSeq, currQual);
747                                         if(success > sdiffs)            {       trashCode += 's';       }
748                                         else{ currentSeqsDiffs += success;  }
749
750                                 }
751                 
752                                 if(numFPrimers != 0){
753                                         success = trimOligos->stripForward(currSeq, currQual, primerIndex, keepforward);
754                                         if(success > pdiffs)            {       trashCode += 'f';       }
755                                         else{ currentSeqsDiffs += success;  }
756                                 }
757                                 
758                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
759                                 
760                                 if(numRPrimers != 0){
761                                         success = trimOligos->stripReverse(currSeq, currQual);
762                                         if(!success)                            {       trashCode += 'r';       }
763                                 }
764                 
765                 if (reorient && (trashCode != "")) { //if you failed and want to check the reverse
766                     int thisSuccess = 0;
767                     string thisTrashCode = "";
768                     int thisCurrentSeqsDiffs = 0;
769                     
770                     int thisBarcodeIndex = 0;
771                     int thisPrimerIndex = 0;
772                     
773                     if(numBarcodes != 0){
774                         thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex);
775                         if(thisSuccess > bdiffs)                {       thisTrashCode += 'b';   }
776                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
777                     }
778                     
779                     if(numFPrimers != 0){
780                         thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, keepforward);
781                         if(thisSuccess > pdiffs)                {       thisTrashCode += 'f';   }
782                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
783                     }
784                     
785                     if (thisCurrentSeqsDiffs > tdiffs)  {       thisTrashCode += 't';   }
786                     
787                     if (thisTrashCode == "") { 
788                         trashCode = thisTrashCode;
789                         success = thisSuccess;
790                         currentSeqsDiffs = thisCurrentSeqsDiffs;
791                         barcodeIndex = thisBarcodeIndex;
792                         primerIndex = thisPrimerIndex;
793                         currSeq.reverseComplement();
794                         if(qFileName != ""){
795                             currQual.flipQScores();
796                         }
797                     }
798                 }
799                 
800                                 if(keepFirst != 0){
801                                         success = keepFirstTrim(currSeq, currQual);
802                                 }
803                                 
804                                 if(removeLast != 0){
805                                         success = removeLastTrim(currSeq, currQual);
806                                         if(!success)                            {       trashCode += 'l';       }
807                                 }
808
809                                 
810                                 if(qFileName != ""){
811                                         int origLength = currSeq.getNumBases();
812                                         
813                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
814                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
815                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
816                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
817                                         else                                            {       success = 1;                            }
818                                         
819                                         //you don't want to trim, if it fails above then scrap it
820                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
821                                         
822                                         if(!success)                            {       trashCode += 'q';       }
823                                 }                               
824                 
825                                 if(minLength > 0 || maxLength > 0){
826                                         success = cullLength(currSeq);
827                                         if(!success)                            {       trashCode += 'l';       }
828                                 }
829                                 if(maxHomoP > 0){
830                                         success = cullHomoP(currSeq);
831                                         if(!success)                            {       trashCode += 'h';       }
832                                 }
833                                 if(maxAmbig != -1){
834                                         success = cullAmbigs(currSeq);
835                                         if(!success)                            {       trashCode += 'n';       }
836                                 }
837                                 
838                                 if(flip){               // should go last                       
839                                         currSeq.reverseComplement();
840                                         if(qFileName != ""){
841                                                 currQual.flipQScores(); 
842                                         }
843                                 }
844                                 
845                 if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + ", trashcode= " + trashCode); if (trashCode.length() != 0) { m->mothurOutEndLine(); } }
846                 
847                                 if(trashCode.length() == 0){
848                     string thisGroup = "";
849                     if (createGroup) {
850                                                 if(numBarcodes != 0){
851                                                         thisGroup = barcodeNameVector[barcodeIndex];
852                                                         if (numFPrimers != 0) {
853                                                                 if (primerNameVector[primerIndex] != "") { 
854                                                                         if(thisGroup != "") {
855                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
856                                                                         }else {
857                                                                                 thisGroup = primerNameVector[primerIndex]; 
858                                                                         }
859                                                                 } 
860                                                         }
861                         }
862                     }
863                     
864                     int pos = thisGroup.find("ignore");
865                     if (pos == string::npos) {
866                         currSeq.setAligned(currSeq.getUnaligned());
867                         currSeq.printSequence(trimFASTAFile);
868                         
869                         if(qFileName != ""){
870                             currQual.printQScores(trimQualFile);
871                         }
872                         
873                         
874                         if(nameFile != ""){
875                             map<string, string>::iterator itName = nameMap.find(currSeq.getName());
876                             if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
877                             else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
878                         }
879                         
880                         int numRedundants = 0;
881                         if (countfile != "") {
882                             map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
883                             if (itCount != nameCount.end()) { 
884                                 trimCountFile << itCount->first << '\t' << itCount->second << endl;
885                                 numRedundants = itCount->second-1;
886                             }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
887                         }
888                         
889                         if (createGroup) {
890                             if(numBarcodes != 0){
891                                                                 
892                                 if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
893                                 
894                                 if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
895                                 else {   groupMap[currSeq.getName()] = thisGroup; }
896                                 
897                                 if (nameFile != "") {
898                                     map<string, string>::iterator itName = nameMap.find(currSeq.getName());
899                                     if (itName != nameMap.end()) { 
900                                         vector<string> thisSeqsNames; 
901                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
902                                         numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
903                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
904                                             outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
905                                         }
906                                     }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                       
907                                 }
908                                 
909                                 map<string, int>::iterator it = groupCounts.find(thisGroup);
910                                 if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1 + numRedundants; }
911                                 else { groupCounts[it->first] += (1 + numRedundants); }
912                                                                 
913                             }
914                         }
915                         
916                         if(allFiles){
917                             ofstream output;
918                             m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
919                             currSeq.printSequence(output);
920                             output.close();
921                             
922                             if(qFileName != ""){
923                                 m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
924                                 currQual.printQScores(output);
925                                 output.close();                                                 
926                             }
927                             
928                             if(nameFile != ""){
929                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
930                                 if (itName != nameMap.end()) { 
931                                     m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
932                                     output << itName->first << '\t' << itName->second << endl; 
933                                     output.close();
934                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
935                             }
936                         }
937                     }
938                                 }
939                                 else{
940                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
941                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
942                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
943                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
944                                         }
945                     if (countfile != "") {
946                         map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
947                         if (itCount != nameCount.end()) { 
948                             trimCountFile << itCount->first << '\t' << itCount->second << endl;
949                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
950                     }
951                     
952                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
953                                         currSeq.setUnaligned(origSeq);
954                                         currSeq.setAligned(origSeq);
955                                         currSeq.printSequence(scrapFASTAFile);
956                                         if(qFileName != ""){
957                                                 currQual.printQScores(scrapQualFile);
958                                         }
959                                 }
960                                 count++;
961                         }
962                         
963                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
964                                 unsigned long long pos = inFASTA.tellg();
965                                 if ((pos == -1) || (pos >= line.end)) { break; }
966                         
967                         #else
968                                 if (inFASTA.eof()) { break; }
969                         #endif
970                         
971                         //report progress
972                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
973                         
974                 }
975                 //report progress
976                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
977                 
978                 delete trimOligos;
979         if (reorient) { delete rtrimOligos; }
980                 inFASTA.close();
981                 trimFASTAFile.close();
982                 scrapFASTAFile.close();
983                 if (createGroup) {       outGroupsFile.close();   }
984                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
985                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
986         if(countfile != "")     {       scrapCountFile.close(); trimCountFile.close();  }
987                 
988                 return count;
989         }
990         catch(exception& e) {
991                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
992                 exit(1);
993         }
994 }
995
996 /**************************************************************************************************/
997
998 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string trimCountFileName, string scrapCountFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
999         try {
1000         
1001         int process = 1;
1002                 int exitCommand = 1;
1003                 processIDS.clear();
1004                 
1005 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1006                                 //loop through and create all the processes you want
1007                 while (process != processors) {
1008                         int pid = fork();
1009                         
1010                         if (pid > 0) {
1011                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1012                                 process++;
1013                         }else if (pid == 0){
1014                                 
1015                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1016                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1017                                 vector<vector<string> > tempNameFileNames = nameFileNames;
1018
1019                                 if(allFiles){
1020                                         ofstream temp;
1021
1022                                         for(int i=0;i<tempFASTAFileNames.size();i++){
1023                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1024                                                         if (tempFASTAFileNames[i][j] != "") {
1025                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
1026                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
1027
1028                                                                 if(qFileName != ""){
1029                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
1030                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
1031                                                                 }
1032                                                                 if(nameFile != ""){
1033                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
1034                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
1035                                                                 }
1036                                                         }
1037                                                 }
1038                                         }
1039                                 }
1040                                                         
1041                                 driverCreateTrim(filename,
1042                                                                  qFileName,
1043                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
1044                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
1045                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
1046                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
1047                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
1048                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
1049                                  (trimCountFileName + toString(getpid()) + ".temp"),
1050                                                                  (scrapCountFileName + toString(getpid()) + ".temp"),
1051                                                                  (groupFile + toString(getpid()) + ".temp"),
1052                                                                  tempFASTAFileNames,
1053                                                                  tempPrimerQualFileNames,
1054                                                                  tempNameFileNames,
1055                                                                  lines[process],
1056                                                                  qLines[process]);
1057                 
1058                 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); }
1059                                 
1060                                 //pass groupCounts to parent
1061                                 if(createGroup){
1062                                         ofstream out;
1063                                         string tempFile = filename + toString(getpid()) + ".num.temp";
1064                                         m->openOutputFile(tempFile, out);
1065                                         
1066                                         out << groupCounts.size() << endl;
1067                                         
1068                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
1069                                                 out << it->first << '\t' << it->second << endl;
1070                                         }
1071                     
1072                     out << groupMap.size() << endl;
1073                     for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
1074                                                 out << it->first << '\t' << it->second << endl;
1075                                         }
1076                                         out.close();
1077                                 }
1078                                 exit(0);
1079                         }else { 
1080                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1081                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1082                                 exit(0);
1083                         }
1084                 }
1085                 
1086                 //parent do my part
1087                 ofstream temp;
1088                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
1089                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
1090                 if(qFileName != ""){
1091                         m->openOutputFile(trimQualFileName, temp);              temp.close();
1092                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
1093                 }
1094                 if (nameFile != "") {
1095                         m->openOutputFile(trimNameFileName, temp);              temp.close();
1096                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
1097                 }
1098         if (countfile != "") {
1099                         m->openOutputFile(trimCountFileName, temp);             temp.close();
1100                         m->openOutputFile(scrapCountFileName, temp);            temp.close();
1101                 }
1102
1103                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, trimCountFileName, scrapCountFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
1104                 
1105                 //force parent to wait until all the processes are done
1106                 for (int i=0;i<processIDS.size();i++) { 
1107                         int temp = processIDS[i];
1108                         wait(&temp);
1109                 }
1110 #else
1111         //////////////////////////////////////////////////////////////////////////////////////////////////////
1112                 //Windows version shared memory, so be careful when passing variables through the trimData struct. 
1113                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1114                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1115                 
1116                 vector<trimData*> pDataArray; 
1117                 DWORD   dwThreadIdArray[processors-1];
1118                 HANDLE  hThreadArray[processors-1]; 
1119                 
1120                 //Create processor worker threads.
1121                 for( int h=0; h<processors-1; h++){
1122                         
1123             string extension = "";
1124                         if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
1125             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1126             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1127             vector<vector<string> > tempNameFileNames = nameFileNames;
1128             
1129             if(allFiles){
1130                 ofstream temp;
1131                 
1132                 for(int i=0;i<tempFASTAFileNames.size();i++){
1133                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
1134                         if (tempFASTAFileNames[i][j] != "") {
1135                             tempFASTAFileNames[i][j] += extension;
1136                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
1137                             
1138                             if(qFileName != ""){
1139                                 tempPrimerQualFileNames[i][j] += extension;
1140                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
1141                             }
1142                             if(nameFile != ""){
1143                                 tempNameFileNames[i][j] += extension;
1144                                 m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
1145                             }
1146                         }
1147                     }
1148                 }
1149             }
1150
1151             
1152                         trimData* tempTrim = new trimData(filename,
1153                                               qFileName, nameFile, countfile,
1154                                               (trimFASTAFileName+extension),
1155                                               (scrapFASTAFileName+extension),
1156                                               (trimQualFileName+extension),
1157                                               (scrapQualFileName+extension),
1158                                               (trimNameFileName+extension),
1159                                               (scrapNameFileName+extension),
1160                                               (trimCountFileName+extension),
1161                                               (scrapCountFileName+extension),
1162                                               (groupFile+extension),
1163                                               tempFASTAFileNames,
1164                                               tempPrimerQualFileNames,
1165                                               tempNameFileNames,
1166                                               lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
1167                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, pairedBarcodes, pairedPrimers, pairedOligos,
1168                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
1169                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
1170                                              minLength, maxAmbig, maxHomoP, maxLength, flip, reorient, nameMap, nameCount);
1171                         pDataArray.push_back(tempTrim);
1172             
1173                         hThreadArray[h] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
1174                 }
1175         
1176         //parent do my part
1177                 ofstream temp;
1178                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
1179                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
1180                 if(qFileName != ""){
1181                         m->openOutputFile(trimQualFileName, temp);              temp.close();
1182                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
1183                 }
1184                 if (nameFile != "") {
1185                         m->openOutputFile(trimNameFileName, temp);              temp.close();
1186                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
1187                 }
1188         vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1189         vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1190         vector<vector<string> > tempNameFileNames = nameFileNames;
1191         if(allFiles){
1192             ofstream temp;
1193             string extension = toString(processors-1) + ".temp";
1194             for(int i=0;i<tempFASTAFileNames.size();i++){
1195                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1196                     if (tempFASTAFileNames[i][j] != "") {
1197                         tempFASTAFileNames[i][j] += extension;
1198                         m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
1199                         
1200                         if(qFileName != ""){
1201                             tempPrimerQualFileNames[i][j] += extension;
1202                             m->openOutputFile(tempPrimerQualFileNames[i][j], temp);             temp.close();
1203                         }
1204                         if(nameFile != ""){
1205                             tempNameFileNames[i][j] += extension;
1206                             m->openOutputFile(tempNameFileNames[i][j], temp);           temp.close();
1207                         }
1208                     }
1209                 }
1210             }
1211         }
1212         
1213                 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames, tempNameFileNames, lines[processors-1], qLines[processors-1]);
1214         processIDS.push_back(processors-1);
1215
1216         
1217                 //Wait until all threads have terminated.
1218                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1219                 
1220                 //Close all thread handles and free memory allocations.
1221                 for(int i=0; i < pDataArray.size(); i++){
1222             if (pDataArray[i]->count != pDataArray[i]->lineEnd) {
1223                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->lineEnd) + " sequences assigned to it, quitting. \n"); m->control_pressed = true;
1224             }
1225                         for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
1226                 map<string, int>::iterator it2 = groupCounts.find(it->first);
1227                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
1228                 else { groupCounts[it->first] += it->second; }
1229             }
1230             for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
1231                 map<string, string>::iterator it2 = groupMap.find(it->first);
1232                 if (it2 == groupMap.end()) {    groupMap[it->first] = it->second; }
1233                 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
1234             }
1235             CloseHandle(hThreadArray[i]);
1236                         delete pDataArray[i];
1237                 }
1238         
1239 #endif          
1240         
1241         
1242         //append files
1243                 for(int i=0;i<processIDS.size();i++){
1244                         
1245                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1246                         
1247                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
1248                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
1249                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1250                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1251                         
1252                         if(qFileName != ""){
1253                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1254                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1255                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1256                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1257                         }
1258                         
1259                         if(nameFile != ""){
1260                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1261                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1262                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1263                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1264                         }
1265             
1266             if(countfile != ""){
1267                                 m->appendFiles((trimCountFileName + toString(processIDS[i]) + ".temp"), trimCountFileName);
1268                                 m->mothurRemove((trimCountFileName + toString(processIDS[i]) + ".temp"));
1269                                 m->appendFiles((scrapCountFileName + toString(processIDS[i]) + ".temp"), scrapCountFileName);
1270                                 m->mothurRemove((scrapCountFileName + toString(processIDS[i]) + ".temp"));
1271                         }
1272                         
1273                         if((createGroup)&&(countfile == "")){
1274                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1275                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1276                         }
1277                         
1278                         
1279                         if(allFiles){
1280                                 for(int j=0;j<fastaFileNames.size();j++){
1281                                         for(int k=0;k<fastaFileNames[j].size();k++){
1282                                                 if (fastaFileNames[j][k] != "") {
1283                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1284                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1285                                                         
1286                                                         if(qFileName != ""){
1287                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1288                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1289                                                         }
1290                                                         
1291                                                         if(nameFile != ""){
1292                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1293                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1294                                                         }
1295                                                 }
1296                                         }
1297                                 }
1298                         }
1299                         
1300             #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1301                         if(createGroup){
1302                                 ifstream in;
1303                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1304                                 m->openInputFile(tempFile, in);
1305                                 int tempNum;
1306                                 string group;
1307                                 
1308                                 in >> tempNum; m->gobble(in);
1309                                 
1310                                 if (tempNum != 0) {
1311                                         for (int i = 0; i < tempNum; i++) { 
1312                         int groupNum;
1313                                                 in >> group >> groupNum; m->gobble(in);
1314                         
1315                                                 map<string, int>::iterator it = groupCounts.find(group);
1316                                                 if (it == groupCounts.end()) {  groupCounts[group] = groupNum; }
1317                                                 else { groupCounts[it->first] += groupNum; }
1318                                         }
1319                                 }
1320                 in >> tempNum; m->gobble(in);
1321                 if (tempNum != 0) {
1322                                         for (int i = 0; i < tempNum; i++) { 
1323                         string group, seqName;
1324                                                 in >> seqName >> group; m->gobble(in);
1325                         
1326                                                 map<string, string>::iterator it = groupMap.find(seqName);
1327                                                 if (it == groupMap.end()) {     groupMap[seqName] = group; }
1328                                                 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
1329                                         }
1330                                 }
1331                 
1332                                 in.close(); m->mothurRemove(tempFile);
1333                         }
1334             #endif
1335                 }
1336
1337         return exitCommand;
1338         }
1339         catch(exception& e) {
1340                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1341                 exit(1);
1342         }
1343 }
1344
1345 /**************************************************************************************************/
1346
1347 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1348         try {
1349         
1350         vector<unsigned long long> fastaFilePos;
1351                 vector<unsigned long long> qfileFilePos;
1352                 
1353                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1354                 //set file positions for fasta file
1355                 fastaFilePos = m->divideFile(filename, processors);
1356                 
1357                 //get name of first sequence in each chunk
1358                 map<string, int> firstSeqNames;
1359                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1360                         ifstream in;
1361                         m->openInputFile(filename, in);
1362                         in.seekg(fastaFilePos[i]);
1363                 
1364                         Sequence temp(in); 
1365                         firstSeqNames[temp.getName()] = i;
1366                 
1367                         in.close();
1368                 }
1369                 
1370                 if(qfilename != "")     {
1371             //seach for filePos of each first name in the qfile and save in qfileFilePos
1372             ifstream inQual;
1373             m->openInputFile(qfilename, inQual);
1374             
1375             string input;
1376             while(!inQual.eof()){       
1377                 input = m->getline(inQual);
1378                 
1379                 if (input.length() != 0) {
1380                     if(input[0] == '>'){ //this is a sequence name line
1381                         istringstream nameStream(input);
1382                         
1383                         string sname = "";  nameStream >> sname;
1384                         sname = sname.substr(1);
1385                         
1386                         for (int i = 0; i < sname.length(); i++) {
1387                             if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
1388                         }
1389                         
1390                         map<string, int>::iterator it = firstSeqNames.find(sname);
1391                         
1392                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
1393                             unsigned long long pos = inQual.tellg(); 
1394                             qfileFilePos.push_back(pos - input.length() - 1);   
1395                             firstSeqNames.erase(it);
1396                         }
1397                     }
1398                 }
1399                 
1400                 if (firstSeqNames.size() == 0) { break; }
1401             }
1402             inQual.close();
1403             
1404             
1405             if (firstSeqNames.size() != 0) { 
1406                 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1407                     m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1408                 }
1409                 qFileName = "";
1410                 return processors;
1411             }
1412             
1413             //get last file position of qfile
1414             FILE * pFile;
1415             unsigned long long size;
1416             
1417             //get num bytes in file
1418             pFile = fopen (qfilename.c_str(),"rb");
1419             if (pFile==NULL) perror ("Error opening file");
1420             else{
1421                 fseek (pFile, 0, SEEK_END);
1422                 size=ftell (pFile);
1423                 fclose (pFile);
1424             }
1425             
1426             qfileFilePos.push_back(size);
1427         }
1428         
1429         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1430             if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); }
1431                         lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1432                         if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
1433                 }       
1434                 if(qfilename == "")     {       qLines = lines; } //files with duds
1435                 
1436                 return processors;
1437                 
1438                 #else
1439             
1440         if (processors == 1) { //save time
1441                         //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1442                         //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1443             lines.push_back(linePair(0, 1000));
1444             if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
1445         }else{
1446             int numFastaSeqs = 0;
1447             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
1448             if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1449         
1450             if (qfilename != "") { 
1451                 int numQualSeqs = 0;
1452                 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
1453                 
1454                 if (numFastaSeqs != numQualSeqs) {
1455                     m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
1456                 }
1457             }
1458         
1459             //figure out how many sequences you have to process
1460             int numSeqsPerProcessor = numFastaSeqs / processors;
1461             for (int i = 0; i < processors; i++) {
1462                 int startIndex =  i * numSeqsPerProcessor;
1463                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
1464                 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1465                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1466             }
1467         }
1468             if(qfilename == "") {       qLines = lines; } //files with duds
1469                         return 1;
1470                 
1471                 #endif
1472         }
1473         catch(exception& e) {
1474                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1475                 exit(1);
1476         }
1477 }
1478
1479 //***************************************************************************************************************
1480
1481 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1482         try {
1483                 ifstream inOligos;
1484                 m->openInputFile(oligoFile, inOligos);
1485                 
1486                 ofstream test;
1487                 
1488                 string type, oligo, roligo, group;
1489         bool hasPrimer = false; bool hasPairedBarcodes = false;
1490
1491                 int indexPrimer = 0;
1492                 int indexBarcode = 0;
1493         int indexPairedPrimer = 0;
1494                 int indexPairedBarcode = 0;
1495         set<string> uniquePrimers;
1496         set<string> uniqueBarcodes;
1497                 
1498                 while(!inOligos.eof()){
1499
1500                         inOligos >> type; 
1501             
1502                         if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
1503             
1504                         if(type[0] == '#'){
1505                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1506                                 m->gobble(inOligos);
1507                         }
1508                         else{
1509                                 m->gobble(inOligos);
1510                                 //make type case insensitive
1511                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1512                                 
1513                                 inOligos >> oligo;
1514                 
1515                 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
1516                                 
1517                                 for(int i=0;i<oligo.length();i++){
1518                                         oligo[i] = toupper(oligo[i]);
1519                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1520                                 }
1521                                 
1522                                 if(type == "FORWARD"){
1523                                         group = "";
1524                                         
1525                                         // get rest of line in case there is a primer name
1526                                         while (!inOligos.eof()) {       
1527                                                 char c = inOligos.get(); 
1528                                                 if (c == 10 || c == 13){        break;  }
1529                                                 else if (c == 32 || c == 9){;} //space or tab
1530                                                 else {  group += c;  }
1531                                         } 
1532                                         
1533                                         //check for repeat barcodes
1534                                         map<string, int>::iterator itPrime = primers.find(oligo);
1535                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1536                                         
1537                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); }  }
1538                     
1539                                         primers[oligo]=indexPrimer; indexPrimer++;              
1540                                         primerNameVector.push_back(group);
1541                                 }
1542                 else if (type == "PRIMER"){
1543                     m->gobble(inOligos);
1544                                         
1545                     inOligos >> roligo;
1546                     
1547                     for(int i=0;i<roligo.length();i++){
1548                         roligo[i] = toupper(roligo[i]);
1549                         if(roligo[i] == 'U')    {       roligo[i] = 'T';        }
1550                     }
1551                     roligo = reverseOligo(roligo);
1552                     
1553                     group = "";
1554                     
1555                                         // get rest of line in case there is a primer name
1556                                         while (!inOligos.eof()) {
1557                                                 char c = inOligos.get();
1558                                                 if (c == 10 || c == 13){        break;  }
1559                                                 else if (c == 32 || c == 9){;} //space or tab
1560                                                 else {  group += c;  }
1561                                         }
1562                     
1563                     oligosPair newPrimer(oligo, roligo);
1564                                         
1565                                         //check for repeat barcodes
1566                     string tempPair = oligo+roligo;
1567                     if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine();  }
1568                     else { uniquePrimers.insert(tempPair); }
1569                                         
1570                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
1571                     
1572                                         pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
1573                                         primerNameVector.push_back(group);
1574                     hasPrimer = true;
1575                 }
1576                                 else if(type == "REVERSE"){
1577                                         //Sequence oligoRC("reverse", oligo);
1578                                         //oligoRC.reverseComplement();
1579                     string oligoRC = reverseOligo(oligo);
1580                                         revPrimer.push_back(oligoRC);
1581                                 }
1582                                 else if(type == "BARCODE"){
1583                                         inOligos >> group;
1584                     
1585                     //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
1586                     //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
1587                     
1588                     string temp = "";
1589                     while (!inOligos.eof())     {
1590                                                 char c = inOligos.get();
1591                                                 if (c == 10 || c == 13){        break;  }
1592                                                 else if (c == 32 || c == 9){;} //space or tab
1593                                                 else {  temp += c;  }
1594                                         }
1595                                         
1596                     //then this is illumina data with 4 columns
1597                     if (temp != "") {
1598                         hasPairedBarcodes = true;
1599                         string reverseBarcode = group; //reverseOligo(group); //reverse barcode
1600                         group = temp;
1601                         
1602                         for(int i=0;i<reverseBarcode.length();i++){
1603                             reverseBarcode[i] = toupper(reverseBarcode[i]);
1604                             if(reverseBarcode[i] == 'U')        {       reverseBarcode[i] = 'T';        }
1605                         }
1606                         
1607                         oligosPair newPair(oligo, reverseOligo(reverseBarcode));
1608                         
1609                         if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
1610                         
1611                         //check for repeat barcodes
1612                         string tempPair = oligo+reverseBarcode;
1613                         if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse +  " is in your oligos file already, disregarding."); m->mothurOutEndLine();  }
1614                         else { uniqueBarcodes.insert(tempPair); }
1615                         
1616                         pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
1617                         barcodeNameVector.push_back(group);
1618                     }else {                             
1619                         //check for repeat barcodes
1620                         map<string, int>::iterator itBar = barcodes.find(oligo);
1621                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1622                         
1623                         barcodes[oligo]=indexBarcode; indexBarcode++;
1624                         barcodeNameVector.push_back(group);
1625                     }
1626                                 }else if(type == "LINKER"){
1627                                         linker.push_back(oligo);
1628                                 }else if(type == "SPACER"){
1629                                         spacer.push_back(oligo);
1630                                 }
1631                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1632                         }
1633                         m->gobble(inOligos);
1634                 }       
1635                 inOligos.close();
1636                 
1637         if (hasPairedBarcodes || hasPrimer) {
1638             pairedOligos = true;
1639             if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true;  m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine();  return 0; }
1640         }
1641         
1642                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1643                 
1644                 //add in potential combos
1645                 if(barcodeNameVector.size() == 0){
1646                         barcodes[""] = 0;
1647                         barcodeNameVector.push_back("");                        
1648                 }
1649                 
1650                 if(primerNameVector.size() == 0){
1651                         primers[""] = 0;
1652                         primerNameVector.push_back("");                 
1653                 }
1654                 
1655                 fastaFileNames.resize(barcodeNameVector.size());
1656                 for(int i=0;i<fastaFileNames.size();i++){
1657                         fastaFileNames[i].assign(primerNameVector.size(), "");
1658                 }
1659                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1660                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1661                 
1662                 if(allFiles){
1663                         set<string> uniqueNames; //used to cleanup outputFileNames
1664             if (pairedOligos) {
1665                 for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
1666                     for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
1667                         
1668                         string primerName = primerNameVector[itPrimer->first];
1669                         string barcodeName = barcodeNameVector[itBar->first];
1670                         
1671                         if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
1672                         else {
1673                             string comboGroupName = "";
1674                             string fastaFileName = "";
1675                             string qualFileName = "";
1676                             string nameFileName = "";
1677                             string countFileName = "";
1678                             
1679                             if(primerName == ""){
1680                                 comboGroupName = barcodeNameVector[itBar->first];
1681                             }
1682                             else{
1683                                 if(barcodeName == ""){
1684                                     comboGroupName = primerNameVector[itPrimer->first];
1685                                 }
1686                                 else{
1687                                     comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
1688                                 }
1689                             }
1690                             
1691                             
1692                             ofstream temp;
1693                             map<string, string> variables;
1694                             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1695                             variables["[tag]"] = comboGroupName;
1696                             fastaFileName = getOutputFileName("fasta", variables);
1697                             if (uniqueNames.count(fastaFileName) == 0) {
1698                                 outputNames.push_back(fastaFileName);
1699                                 outputTypes["fasta"].push_back(fastaFileName);
1700                                 uniqueNames.insert(fastaFileName);
1701                             }
1702                             
1703                             fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
1704                             m->openOutputFile(fastaFileName, temp);             temp.close();
1705                             
1706                             if(qFileName != ""){
1707                                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1708                                 qualFileName = getOutputFileName("qfile", variables);
1709                                 if (uniqueNames.count(qualFileName) == 0) {
1710                                     outputNames.push_back(qualFileName);
1711                                     outputTypes["qfile"].push_back(qualFileName);
1712                                 }
1713                                 
1714                                 qualFileNames[itBar->first][itPrimer->first] = qualFileName;
1715                                 m->openOutputFile(qualFileName, temp);          temp.close();
1716                             }
1717                             
1718                             if(nameFile != ""){
1719                                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1720                                 nameFileName = getOutputFileName("name", variables);
1721                                 if (uniqueNames.count(nameFileName) == 0) {
1722                                     outputNames.push_back(nameFileName);
1723                                     outputTypes["name"].push_back(nameFileName);
1724                                 }
1725                                 
1726                                 nameFileNames[itBar->first][itPrimer->first] = nameFileName;
1727                                 m->openOutputFile(nameFileName, temp);          temp.close();
1728                             }
1729                         }
1730                     }
1731                 }
1732             }else {
1733                 for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1734                     for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1735                         
1736                         string primerName = primerNameVector[itPrimer->second];
1737                         string barcodeName = barcodeNameVector[itBar->second];
1738                         
1739                         if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing 
1740                         else {
1741                             string comboGroupName = "";
1742                             string fastaFileName = "";
1743                             string qualFileName = "";
1744                             string nameFileName = "";
1745                             string countFileName = "";
1746                             
1747                             if(primerName == ""){
1748                                 comboGroupName = barcodeNameVector[itBar->second];
1749                             }
1750                             else{
1751                                 if(barcodeName == ""){
1752                                     comboGroupName = primerNameVector[itPrimer->second];
1753                                 }
1754                                 else{
1755                                     comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1756                                 }
1757                             }
1758                             
1759                             
1760                             ofstream temp;
1761                             map<string, string> variables; 
1762                             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1763                             variables["[tag]"] = comboGroupName;
1764                             fastaFileName = getOutputFileName("fasta", variables);
1765                             if (uniqueNames.count(fastaFileName) == 0) {
1766                                 outputNames.push_back(fastaFileName);
1767                                 outputTypes["fasta"].push_back(fastaFileName);
1768                                 uniqueNames.insert(fastaFileName);
1769                             }
1770                             
1771                             fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1772                             m->openOutputFile(fastaFileName, temp);             temp.close();
1773                             
1774                             if(qFileName != ""){
1775                                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1776                                 qualFileName = getOutputFileName("qfile", variables);
1777                                 if (uniqueNames.count(qualFileName) == 0) {
1778                                     outputNames.push_back(qualFileName);
1779                                     outputTypes["qfile"].push_back(qualFileName);
1780                                 }
1781                                 
1782                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1783                                 m->openOutputFile(qualFileName, temp);          temp.close();
1784                             }
1785                             
1786                             if(nameFile != ""){
1787                                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1788                                 nameFileName = getOutputFileName("name", variables);
1789                                 if (uniqueNames.count(nameFileName) == 0) {
1790                                     outputNames.push_back(nameFileName);
1791                                     outputTypes["name"].push_back(nameFileName);
1792                                 }
1793                                 
1794                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1795                                 m->openOutputFile(nameFileName, temp);          temp.close();
1796                             }
1797                         }
1798                     }
1799                 }
1800             }
1801                 }
1802                 numFPrimers = primers.size();
1803         if (pairedOligos) { numFPrimers  = pairedPrimers.size(); }
1804                 numRPrimers = revPrimer.size();
1805         numLinkers = linker.size();
1806         numSpacers = spacer.size();
1807                 
1808                 bool allBlank = true;
1809                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1810                         if (barcodeNameVector[i] != "") {
1811                                 allBlank = false;
1812                                 break;
1813                         }
1814                 }
1815                 for (int i = 0; i < primerNameVector.size(); i++) {
1816                         if (primerNameVector[i] != "") {
1817                                 allBlank = false;
1818                                 break;
1819                         }
1820                 }
1821
1822                 if (allBlank) {
1823                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1824                         allFiles = false;
1825                         return false;
1826                 }
1827                 
1828                 return true;
1829                 
1830         }
1831         catch(exception& e) {
1832                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1833                 exit(1);
1834         }
1835 }
1836 //***************************************************************************************************************
1837
1838 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1839         try {
1840                 bool success = 1;
1841                 if(qscores.getName() != ""){
1842                         qscores.trimQScores(-1, keepFirst);
1843                 }
1844                 sequence.trim(keepFirst);
1845                 return success;
1846         }
1847         catch(exception& e) {
1848                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1849                 exit(1);
1850         }
1851         
1852 }       
1853
1854 //***************************************************************************************************************
1855
1856 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1857         try {
1858                 bool success = 0;
1859                 
1860                 int length = sequence.getNumBases() - removeLast;
1861                 
1862                 if(length > 0){
1863                         if(qscores.getName() != ""){
1864                                 qscores.trimQScores(-1, length);
1865                         }
1866                         sequence.trim(length);
1867                         success = 1;
1868                 }
1869                 else{
1870                         success = 0;
1871                 }
1872
1873                 return success;
1874         }
1875         catch(exception& e) {
1876                 m->errorOut(e, "removeLastTrim", "countDiffs");
1877                 exit(1);
1878         }
1879         
1880 }       
1881
1882 //***************************************************************************************************************
1883
1884 bool TrimSeqsCommand::cullLength(Sequence& seq){
1885         try {
1886         
1887                 int length = seq.getNumBases();
1888                 bool success = 0;       //guilty until proven innocent
1889                 
1890                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1891                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1892                 else                                                                                            {       success = 0;    }
1893                 
1894                 return success;
1895         
1896         }
1897         catch(exception& e) {
1898                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1899                 exit(1);
1900         }
1901         
1902 }
1903
1904 //***************************************************************************************************************
1905
1906 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1907         try {
1908                 int longHomoP = seq.getLongHomoPolymer();
1909                 bool success = 0;       //guilty until proven innocent
1910                 
1911                 if(longHomoP <= maxHomoP){      success = 1;    }
1912                 else                                    {       success = 0;    }
1913                 
1914                 return success;
1915         }
1916         catch(exception& e) {
1917                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1918                 exit(1);
1919         }
1920         
1921 }
1922 //********************************************************************/
1923 string TrimSeqsCommand::reverseOligo(string oligo){
1924         try {
1925         string reverse = "";
1926         
1927         for(int i=oligo.length()-1;i>=0;i--){
1928             
1929             if(oligo[i] == 'A')         {       reverse += 'T'; }
1930             else if(oligo[i] == 'T'){   reverse += 'A'; }
1931             else if(oligo[i] == 'U'){   reverse += 'A'; }
1932             
1933             else if(oligo[i] == 'G'){   reverse += 'C'; }
1934             else if(oligo[i] == 'C'){   reverse += 'G'; }
1935             
1936             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1937             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1938             
1939             else if(oligo[i] == 'M'){   reverse += 'K'; }
1940             else if(oligo[i] == 'K'){   reverse += 'M'; }
1941             
1942             else if(oligo[i] == 'W'){   reverse += 'W'; }
1943             else if(oligo[i] == 'S'){   reverse += 'S'; }
1944             
1945             else if(oligo[i] == 'B'){   reverse += 'V'; }
1946             else if(oligo[i] == 'V'){   reverse += 'B'; }
1947             
1948             else if(oligo[i] == 'D'){   reverse += 'H'; }
1949             else if(oligo[i] == 'H'){   reverse += 'D'; }
1950             
1951             else                                                {       reverse += 'N'; }
1952         }
1953         
1954         
1955         return reverse;
1956     }
1957         catch(exception& e) {
1958                 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1959                 exit(1);
1960         }
1961 }
1962
1963 //***************************************************************************************************************
1964
1965 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1966         try {
1967                 int numNs = seq.getAmbigBases();
1968                 bool success = 0;       //guilty until proven innocent
1969                 
1970                 if(numNs <= maxAmbig)   {       success = 1;    }
1971                 else                                    {       success = 0;    }
1972                 
1973                 return success;
1974         }
1975         catch(exception& e) {
1976                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1977                 exit(1);
1978         }
1979         
1980 }
1981 //***************************************************************************************************************