]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
added checks to make sure windows processes completed their tasks.
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14
15 //**********************************************************************************************************************
16 vector<string> TrimSeqsCommand::setParameters(){        
17         try {
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
19                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","group",false,false,true); parameters.push_back(poligos);
20                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","qfile",false,false,true); parameters.push_back(pqfile);
21                 CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname);
22         CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount);
23                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pflip);
24                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig);
25                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop);
26                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength);
27                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxlength);
28                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
29                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
30         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
31                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
32         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
33                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
34                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
35                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pkeepforward);
36                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pqtrim);
37                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqthreshold);
38                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqaverage);
39                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(prollaverage);
40                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pqwindowaverage);
41                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pqstepsize);
42                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "","",false,false); parameters.push_back(pqwindowsize);
43                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pkeepfirst);
44                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "","",false,false); parameters.push_back(premovelast);
45                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
46                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
47                         
48                 vector<string> myArray;
49                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
50                 return myArray;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
54                 exit(1);
55         }
56 }
57 //**********************************************************************************************************************
58 string TrimSeqsCommand::getHelpString(){        
59         try {
60                 string helpString = "";
61                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
62                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
63                 helpString += "The trim.seqs command parameters are fasta, name, count, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
64                 helpString += "The fasta parameter is required.\n";
65                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
66                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
67                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
68         helpString += "The count parameter allows you to provide a count file with your fasta file.\n";
69                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
70                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
71                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
72                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
73                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
74                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
75                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
76         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
77                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
78                 helpString += "The qfile parameter allows you to provide a quality file.\n";
79                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
80                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
81                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
82                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
83                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
84                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
85                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
86                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
87                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
88                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
89                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
90                 helpString += "The trim.seqs command should be in the following format: \n";
91                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
92                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
93                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
94                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
95                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
96                 return helpString;
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
100                 exit(1);
101         }
102 }
103 //**********************************************************************************************************************
104 string TrimSeqsCommand::getOutputPattern(string type) {
105     try {
106         string pattern = "";
107         
108         if (type == "qfile") {  pattern = "[filename],[tag],qual"; } 
109         else if (type == "fasta") {  pattern = "[filename],[tag],fasta"; } 
110         else if (type == "group") {  pattern = "[filename],groups"; }
111         else if (type == "name") {  pattern = "[filename],[tag],names"; }
112         else if (type == "count") {  pattern = "[filename],[tag],count_table-[filename],count_table"; }
113         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
114         
115         return pattern;
116     }
117     catch(exception& e) {
118         m->errorOut(e, "TrimSeqsCommand", "getOutputPattern");
119         exit(1);
120     }
121 }
122 //**********************************************************************************************************************
123
124 TrimSeqsCommand::TrimSeqsCommand(){     
125         try {
126                 abort = true; calledHelp = true; 
127                 setParameters();
128                 vector<string> tempOutNames;
129                 outputTypes["fasta"] = tempOutNames;
130                 outputTypes["qfile"] = tempOutNames;
131                 outputTypes["group"] = tempOutNames;
132                 outputTypes["name"] = tempOutNames;
133         outputTypes["count"] = tempOutNames;
134         }
135         catch(exception& e) {
136                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
137                 exit(1);
138         }
139 }
140 //***************************************************************************************************************
141
142 TrimSeqsCommand::TrimSeqsCommand(string option)  {
143         try {
144                 
145                 abort = false; calledHelp = false;   
146                 comboStarts = 0;
147                 
148                 //allow user to run help
149                 if(option == "help") { help(); abort = true; calledHelp = true; }
150                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
151                 
152                 else {
153                         vector<string> myArray = setParameters();
154                         
155                         OptionParser parser(option);
156                         map<string,string> parameters = parser.getParameters();
157                         
158                         ValidParameters validParameter;
159                         map<string,string>::iterator it;
160                         
161                         //check to make sure all parameters are valid for command
162                         for (it = parameters.begin(); it != parameters.end(); it++) { 
163                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
164                         }
165                         
166                         //initialize outputTypes
167                         vector<string> tempOutNames;
168                         outputTypes["fasta"] = tempOutNames;
169                         outputTypes["qfile"] = tempOutNames;
170                         outputTypes["group"] = tempOutNames;
171                         outputTypes["name"] = tempOutNames;
172             outputTypes["count"] = tempOutNames;
173                         
174                         //if the user changes the input directory command factory will send this info to us in the output parameter 
175                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
176                         if (inputDir == "not found"){   inputDir = "";          }
177                         else {
178                                 string path;
179                                 it = parameters.find("fasta");
180                                 //user has given a template file
181                                 if(it != parameters.end()){ 
182                                         path = m->hasPath(it->second);
183                                         //if the user has not given a path then, add inputdir. else leave path alone.
184                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
185                                 }
186                                 
187                                 it = parameters.find("oligos");
188                                 //user has given a template file
189                                 if(it != parameters.end()){ 
190                                         path = m->hasPath(it->second);
191                                         //if the user has not given a path then, add inputdir. else leave path alone.
192                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
193                                 }
194                                 
195                                 it = parameters.find("qfile");
196                                 //user has given a template file
197                                 if(it != parameters.end()){ 
198                                         path = m->hasPath(it->second);
199                                         //if the user has not given a path then, add inputdir. else leave path alone.
200                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
201                                 }
202                                 
203                                 it = parameters.find("name");
204                                 //user has given a template file
205                                 if(it != parameters.end()){ 
206                                         path = m->hasPath(it->second);
207                                         //if the user has not given a path then, add inputdir. else leave path alone.
208                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
209                                 }
210                 
211                 it = parameters.find("count");
212                                 //user has given a template file
213                                 if(it != parameters.end()){ 
214                                         path = m->hasPath(it->second);
215                                         //if the user has not given a path then, add inputdir. else leave path alone.
216                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
217                                 }
218                                 
219                         }
220
221                         
222                         //check for required parameters
223                         fastaFile = validParameter.validFile(parameters, "fasta", true);
224                         if (fastaFile == "not found") {                                 
225                                 fastaFile = m->getFastaFile(); 
226                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
227                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
228                         }else if (fastaFile == "not open") { abort = true; }    
229                         else { m->setFastaFile(fastaFile); }
230                         
231                         //if the user changes the output directory command factory will send this info to us in the output parameter 
232                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
233                                 outputDir = ""; 
234                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
235                         }
236                 
237                         
238                         //check for optional parameter and set defaults
239                         // ...at some point should added some additional type checking...
240                         string temp;
241                         temp = validParameter.validFile(parameters, "flip", false);
242                         if (temp == "not found")    {   flip = 0;       }
243                         else {  flip = m->isTrue(temp);         }
244                 
245                         temp = validParameter.validFile(parameters, "oligos", true);
246                         if (temp == "not found"){       oligoFile = "";         }
247                         else if(temp == "not open"){    abort = true;   } 
248                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
249                         
250                         
251                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
252                         m->mothurConvert(temp, maxAmbig);  
253
254                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
255                         m->mothurConvert(temp, maxHomoP);  
256
257                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
258                         m->mothurConvert(temp, minLength); 
259                         
260                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
261                         m->mothurConvert(temp, maxLength);
262                         
263                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
264                         m->mothurConvert(temp, bdiffs);
265                         
266                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
267                         m->mothurConvert(temp, pdiffs);
268             
269             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
270                         m->mothurConvert(temp, ldiffs);
271             
272             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
273                         m->mothurConvert(temp, sdiffs);
274                         
275                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
276                         m->mothurConvert(temp, tdiffs);
277                         
278                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
279                         
280                         temp = validParameter.validFile(parameters, "qfile", true);     
281                         if (temp == "not found")        {       qFileName = "";         }
282                         else if(temp == "not open")     {       abort = true;           }
283                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
284                         
285                         temp = validParameter.validFile(parameters, "name", true);      
286                         if (temp == "not found")        {       nameFile = "";          }
287                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
288                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
289             
290             countfile = validParameter.validFile(parameters, "count", true);
291                         if (countfile == "not open") { abort = true; countfile = ""; }  
292                         else if (countfile == "not found") { countfile = ""; }
293                         else { m->setCountTableFile(countfile); }
294                         
295             if ((countfile != "") && (nameFile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
296                         
297                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
298                         m->mothurConvert(temp, qThreshold);
299                         
300                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
301                         qtrim = m->isTrue(temp);
302
303                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
304                         convert(temp, qRollAverage);
305
306                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
307                         convert(temp, qWindowAverage);
308
309                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
310                         convert(temp, qWindowSize);
311
312                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
313                         convert(temp, qWindowStep);
314
315                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
316                         convert(temp, qAverage);
317
318                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
319                         convert(temp, keepFirst);
320
321                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
322                         convert(temp, removeLast);
323                         
324                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
325                         allFiles = m->isTrue(temp);
326             
327             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
328                         keepforward = m->isTrue(temp);
329                         
330                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
331                         m->setProcessors(temp);
332                         m->mothurConvert(temp, processors); 
333                         
334                         
335                         if(allFiles && (oligoFile == "")){
336                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
337                         }
338                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
339                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
340                                 qAverage=0;
341                                 qThreshold=0;
342                         }
343                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
344                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
345                                 abort = true;
346                         }
347                         
348             if (countfile == "") {
349                 if (nameFile == "") {
350                     vector<string> files; files.push_back(fastaFile);
351                     parser.getNameFile(files);
352                 }
353             }
354                 }
355
356         }
357         catch(exception& e) {
358                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
359                 exit(1);
360         }
361 }
362 //***************************************************************************************************************
363
364 int TrimSeqsCommand::execute(){
365         try{
366         
367                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
368                 
369                 numFPrimers = 0;  //this needs to be initialized
370                 numRPrimers = 0;
371         numSpacers = 0;
372         numLinkers = 0;
373                 createGroup = false;
374                 vector<vector<string> > fastaFileNames;
375                 vector<vector<string> > qualFileNames;
376                 vector<vector<string> > nameFileNames;
377                 
378         map<string, string> variables; 
379                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
380         variables["[tag]"] = "trim";
381                 string trimSeqFile = getOutputFileName("fasta",variables);
382         string trimQualFile = getOutputFileName("qfile",variables);
383                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
384         
385         variables["[tag]"] = "scrap";
386                 string scrapSeqFile = getOutputFileName("fasta",variables);
387         string scrapQualFile = getOutputFileName("qfile",variables);
388                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
389                 
390                 if (qFileName != "") {
391                         outputNames.push_back(trimQualFile);
392                         outputNames.push_back(scrapQualFile);
393                         outputTypes["qfile"].push_back(trimQualFile);
394                         outputTypes["qfile"].push_back(scrapQualFile); 
395                 }
396                 
397         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
398         variables["[tag]"] = "trim";
399                 string trimNameFile = getOutputFileName("name",variables);
400         variables["[tag]"] = "scrap";
401                 string scrapNameFile = getOutputFileName("name",variables);
402                 
403                 if (nameFile != "") {
404                         m->readNames(nameFile, nameMap);
405                         outputNames.push_back(trimNameFile);
406                         outputNames.push_back(scrapNameFile);
407                         outputTypes["name"].push_back(trimNameFile);
408                         outputTypes["name"].push_back(scrapNameFile); 
409                 }
410         
411         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
412         variables["[tag]"] = "trim";
413         string trimCountFile = getOutputFileName("count",variables);
414         variables["[tag]"] = "scrap";
415                 string scrapCountFile = getOutputFileName("count",variables);
416                 
417                 if (countfile != "") {
418             CountTable ct;
419             ct.readTable(countfile);
420             nameCount = ct.getNameMap();
421                         outputNames.push_back(trimCountFile);
422                         outputNames.push_back(scrapCountFile);
423                         outputTypes["count"].push_back(trimCountFile);
424                         outputTypes["count"].push_back(scrapCountFile); 
425                 }
426
427                 
428                 if (m->control_pressed) { return 0; }
429                 
430                 string outputGroupFileName;
431                 if(oligoFile != ""){
432                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
433                         if ((createGroup) && (countfile == "")){
434                 map<string, string> myvariables; 
435                 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
436                                 outputGroupFileName = getOutputFileName("group",myvariables);
437                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
438                         }
439                 }
440        
441         //fills lines and qlines
442                 setLines(fastaFile, qFileName);
443                 
444         if(processors == 1){
445             driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
446         }else{
447             createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
448         }       
449                 
450                 
451                 if (m->control_pressed) {  return 0; }                  
452         
453                 if(allFiles){
454                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
455                         map<string, string>::iterator it;
456                         set<string> namesToRemove;
457                         for(int i=0;i<fastaFileNames.size();i++){
458                                 for(int j=0;j<fastaFileNames[0].size();j++){
459                                         if (fastaFileNames[i][j] != "") {
460                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
461                                                         if(m->isBlank(fastaFileNames[i][j])){
462                                                                 m->mothurRemove(fastaFileNames[i][j]);
463                                                                 namesToRemove.insert(fastaFileNames[i][j]);
464                                                         
465                                                                 if(qFileName != ""){
466                                                                         m->mothurRemove(qualFileNames[i][j]);
467                                                                         namesToRemove.insert(qualFileNames[i][j]);
468                                                                 }
469                                                                 
470                                                                 if(nameFile != ""){
471                                                                         m->mothurRemove(nameFileNames[i][j]);
472                                                                         namesToRemove.insert(nameFileNames[i][j]);
473                                                                 }
474                                                         }else{  
475                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
476                                                                 if (it == uniqueFastaNames.end()) {     
477                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
478                                                                 }       
479                                                         }
480                                                 }
481                                         }
482                                 }
483                         }
484                         
485                         //remove names for outputFileNames, just cleans up the output
486                         vector<string> outputNames2;
487                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
488                         outputNames = outputNames2;
489                         
490             for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
491                 ifstream in;
492                 m->openInputFile(it->first, in);
493                 
494                 ofstream out;
495                 map<string, string> myvariables; 
496                 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(it->first));
497                 string thisGroupName = "";
498                 if (countfile == "") { thisGroupName = getOutputFileName("group",myvariables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); }
499                 else {  thisGroupName = getOutputFileName("count",myvariables); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName);  }
500                 m->openOutputFile(thisGroupName, out);
501                 
502                 if (countfile != "") {  out << "Representative_Sequence\ttotal\t" << it->second << endl;  }
503                 
504                 while (!in.eof()){
505                     if (m->control_pressed) { break; }
506                     
507                     Sequence currSeq(in); m->gobble(in);
508                     if (countfile == "") {  
509                         out << currSeq.getName() << '\t' << it->second << endl;  
510                         
511                         if (nameFile != "") {
512                             map<string, string>::iterator itName = nameMap.find(currSeq.getName());
513                             if (itName != nameMap.end()) { 
514                                 vector<string> thisSeqsNames; 
515                                 m->splitAtChar(itName->second, thisSeqsNames, ',');
516                                 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
517                                     out << thisSeqsNames[k] << '\t' << it->second << endl;
518                                 }
519                             }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                       
520                         }
521                     }else { 
522                         map<string, int>::iterator itTotalReps = nameCount.find(currSeq.getName());
523                         if (itTotalReps != nameCount.end()) { out << currSeq.getName() << '\t' << itTotalReps->second << '\t' << itTotalReps->second << endl; }
524                         else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
525                     }
526                 }
527                 in.close();
528                 out.close();
529             }
530             
531             if (countfile != "") { //create countfile with group info included
532                 CountTable* ct = new CountTable();
533                 ct->readTable(trimCountFile);
534                 map<string, int> justTrimmedNames = ct->getNameMap();
535                 delete ct;
536                 
537                 CountTable newCt;
538                 for (map<string, int>::iterator itCount = groupCounts.begin(); itCount != groupCounts.end(); itCount++) { newCt.addGroup(itCount->first); }
539                 vector<int> tempCounts; tempCounts.resize(groupCounts.size(), 0);
540                 for (map<string, int>::iterator itNames = justTrimmedNames.begin(); itNames != justTrimmedNames.end(); itNames++) {
541                     newCt.push_back(itNames->first, tempCounts); //add it to the table with no abundance so we can set the groups abundance
542                     map<string, string>::iterator it2 = groupMap.find(itNames->first);
543                     if (it2 != groupMap.end()) { newCt.setAbund(itNames->first, it2->second, itNames->second); }
544                     else { m->mothurOut("[ERROR]: missing group info for " + itNames->first + "."); m->mothurOutEndLine(); m->control_pressed = true; }
545                 }
546                 newCt.printTable(trimCountFile);
547             }
548                 }
549                 
550                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
551
552                 //output group counts
553                 m->mothurOutEndLine();
554                 int total = 0;
555                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
556                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
557                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
558                 }
559                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
560                 
561                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
562
563                 //set fasta file as new current fastafile
564                 string current = "";
565                 itTypes = outputTypes.find("fasta");
566                 if (itTypes != outputTypes.end()) {
567                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
568                 }
569                 
570                 itTypes = outputTypes.find("name");
571                 if (itTypes != outputTypes.end()) {
572                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
573                 }
574                 
575                 itTypes = outputTypes.find("qfile");
576                 if (itTypes != outputTypes.end()) {
577                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
578                 }
579                 
580                 itTypes = outputTypes.find("group");
581                 if (itTypes != outputTypes.end()) {
582                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
583                 }
584         
585         itTypes = outputTypes.find("count");
586                 if (itTypes != outputTypes.end()) {
587                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
588                 }
589
590                 m->mothurOutEndLine();
591                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
592                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
593                 m->mothurOutEndLine();
594                 
595                 return 0;       
596                         
597         }
598         catch(exception& e) {
599                 m->errorOut(e, "TrimSeqsCommand", "execute");
600                 exit(1);
601         }
602 }
603                 
604 /**************************************************************************************/
605 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string trimCFileName, string scrapCFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) { 
606                 
607         try {
608                 
609                 ofstream trimFASTAFile;
610                 m->openOutputFile(trimFileName, trimFASTAFile);
611                 
612                 ofstream scrapFASTAFile;
613                 m->openOutputFile(scrapFileName, scrapFASTAFile);
614                 
615                 ofstream trimQualFile;
616                 ofstream scrapQualFile;
617                 if(qFileName != ""){
618                         m->openOutputFile(trimQFileName, trimQualFile);
619                         m->openOutputFile(scrapQFileName, scrapQualFile);
620                 }
621                 
622                 ofstream trimNameFile;
623                 ofstream scrapNameFile;
624                 if(nameFile != ""){
625                         m->openOutputFile(trimNFileName, trimNameFile);
626                         m->openOutputFile(scrapNFileName, scrapNameFile);
627                 }
628                 
629         ofstream trimCountFile;
630                 ofstream scrapCountFile;
631                 if(countfile != ""){
632                         m->openOutputFile(trimCFileName, trimCountFile);
633                         m->openOutputFile(scrapCFileName, scrapCountFile);
634             if (line.start == 0) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
635                 }
636                 
637                 ofstream outGroupsFile;
638                 if ((createGroup) && (countfile == "")){        m->openOutputFile(groupFileName, outGroupsFile);   }
639                 if(allFiles){
640                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
641                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
642                                         if (fastaFileNames[i][j] != "") {
643                                                 ofstream temp;
644                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
645                                                 if(qFileName != ""){
646                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
647                                                 }
648                                                 
649                                                 if(nameFile != ""){
650                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
651                                                 }
652                                         }
653                                 }
654                         }
655                 }
656                 
657                 ifstream inFASTA;
658                 m->openInputFile(filename, inFASTA);
659                 inFASTA.seekg(line.start);
660                 
661                 ifstream qFile;
662                 if(qFileName != "")     {
663                         m->openInputFile(qFileName, qFile);
664                         qFile.seekg(qline.start);  
665                 }
666                 
667                 int count = 0;
668                 bool moreSeqs = 1;
669                 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
670         
671                 while (moreSeqs) {
672                                 
673                         if (m->control_pressed) { 
674                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
675                                 if ((createGroup) && (countfile == "")) {        outGroupsFile.close();   }
676                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
677                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
678                 if(countfile != "")     {       scrapCountFile.close(); trimCountFile.close();  }
679                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;
680                         }
681                         
682                         int success = 1;
683                         string trashCode = "";
684                         int currentSeqsDiffs = 0;
685
686                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
687                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
688             
689                         QualityScores currQual;
690                         if(qFileName != ""){
691                                 currQual = QualityScores(qFile);  m->gobble(qFile);
692                 //cout << currQual.getName() << endl;
693                         }
694                         
695                         string origSeq = currSeq.getUnaligned();
696                         if (origSeq != "") {
697                                 
698                                 int barcodeIndex = 0;
699                                 int primerIndex = 0;
700                                 
701                 if(numLinkers != 0){
702                                         success = trimOligos.stripLinker(currSeq, currQual);
703                                         if(success > ldiffs)            {       trashCode += 'k';       }
704                                         else{ currentSeqsDiffs += success;  }
705
706                                 }
707                 
708                                 if(barcodes.size() != 0){
709                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
710                                         if(success > bdiffs)            {       trashCode += 'b';       }
711                                         else{ currentSeqsDiffs += success;  }
712                                 }
713                                 
714                 if(numSpacers != 0){
715                                         success = trimOligos.stripSpacer(currSeq, currQual);
716                                         if(success > sdiffs)            {       trashCode += 's';       }
717                                         else{ currentSeqsDiffs += success;  }
718
719                                 }
720                 
721                                 if(numFPrimers != 0){
722                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
723                                         if(success > pdiffs)            {       trashCode += 'f';       }
724                                         else{ currentSeqsDiffs += success;  }
725                                 }
726                                 
727                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
728                                 
729                                 if(numRPrimers != 0){
730                                         success = trimOligos.stripReverse(currSeq, currQual);
731                                         if(!success)                            {       trashCode += 'r';       }
732                                 }
733
734                                 if(keepFirst != 0){
735                                         success = keepFirstTrim(currSeq, currQual);
736                                 }
737                                 
738                                 if(removeLast != 0){
739                                         success = removeLastTrim(currSeq, currQual);
740                                         if(!success)                            {       trashCode += 'l';       }
741                                 }
742
743                                 
744                                 if(qFileName != ""){
745                                         int origLength = currSeq.getNumBases();
746                                         
747                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
748                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
749                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
750                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
751                                         else                                            {       success = 1;                            }
752                                         
753                                         //you don't want to trim, if it fails above then scrap it
754                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
755                                         
756                                         if(!success)                            {       trashCode += 'q';       }
757                                 }                               
758                 
759                                 if(minLength > 0 || maxLength > 0){
760                                         success = cullLength(currSeq);
761                                         if(!success)                            {       trashCode += 'l';       }
762                                 }
763                                 if(maxHomoP > 0){
764                                         success = cullHomoP(currSeq);
765                                         if(!success)                            {       trashCode += 'h';       }
766                                 }
767                                 if(maxAmbig != -1){
768                                         success = cullAmbigs(currSeq);
769                                         if(!success)                            {       trashCode += 'n';       }
770                                 }
771                                 
772                                 if(flip){               // should go last                       
773                                         currSeq.reverseComplement();
774                                         if(qFileName != ""){
775                                                 currQual.flipQScores(); 
776                                         }
777                                 }
778                                 
779                 if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + ", trashcode= " + trashCode); if (trashCode.length() != 0) { m->mothurOutEndLine(); } }
780                 
781                                 if(trashCode.length() == 0){
782                     string thisGroup = "";
783                     if (createGroup) {
784                                                 if(barcodes.size() != 0){
785                                                         thisGroup = barcodeNameVector[barcodeIndex];
786                                                         if (primers.size() != 0) { 
787                                                                 if (primerNameVector[primerIndex] != "") { 
788                                                                         if(thisGroup != "") {
789                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
790                                                                         }else {
791                                                                                 thisGroup = primerNameVector[primerIndex]; 
792                                                                         }
793                                                                 } 
794                                                         }
795                         }
796                     }
797                     
798                     int pos = thisGroup.find("ignore");
799                     if (pos == string::npos) {
800                         currSeq.setAligned(currSeq.getUnaligned());
801                         currSeq.printSequence(trimFASTAFile);
802                         
803                         if(qFileName != ""){
804                             currQual.printQScores(trimQualFile);
805                         }
806                         
807                         
808                         if(nameFile != ""){
809                             map<string, string>::iterator itName = nameMap.find(currSeq.getName());
810                             if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
811                             else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
812                         }
813                         
814                         int numRedundants = 0;
815                         if (countfile != "") {
816                             map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
817                             if (itCount != nameCount.end()) { 
818                                 trimCountFile << itCount->first << '\t' << itCount->second << endl;
819                                 numRedundants = itCount->second-1;
820                             }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
821                         }
822                         
823                         if (createGroup) {
824                             if(barcodes.size() != 0){
825                                                                 
826                                 if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
827                                 
828                                 if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
829                                 else {   groupMap[currSeq.getName()] = thisGroup; }
830                                 
831                                 if (nameFile != "") {
832                                     map<string, string>::iterator itName = nameMap.find(currSeq.getName());
833                                     if (itName != nameMap.end()) { 
834                                         vector<string> thisSeqsNames; 
835                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
836                                         numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
837                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
838                                             outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
839                                         }
840                                     }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                       
841                                 }
842                                 
843                                 map<string, int>::iterator it = groupCounts.find(thisGroup);
844                                 if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1 + numRedundants; }
845                                 else { groupCounts[it->first] += (1 + numRedundants); }
846                                                                 
847                             }
848                         }
849                         
850                         if(allFiles){
851                             ofstream output;
852                             m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
853                             currSeq.printSequence(output);
854                             output.close();
855                             
856                             if(qFileName != ""){
857                                 m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
858                                 currQual.printQScores(output);
859                                 output.close();                                                 
860                             }
861                             
862                             if(nameFile != ""){
863                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
864                                 if (itName != nameMap.end()) { 
865                                     m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
866                                     output << itName->first << '\t' << itName->second << endl; 
867                                     output.close();
868                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
869                             }
870                         }
871                     }
872                                 }
873                                 else{
874                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
875                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
876                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
877                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
878                                         }
879                     if (countfile != "") {
880                         map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
881                         if (itCount != nameCount.end()) { 
882                             trimCountFile << itCount->first << '\t' << itCount->second << endl;
883                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
884                     }
885                     
886                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
887                                         currSeq.setUnaligned(origSeq);
888                                         currSeq.setAligned(origSeq);
889                                         currSeq.printSequence(scrapFASTAFile);
890                                         if(qFileName != ""){
891                                                 currQual.printQScores(scrapQualFile);
892                                         }
893                                 }
894                                 count++;
895                         }
896                         
897                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
898                                 unsigned long long pos = inFASTA.tellg();
899                                 if ((pos == -1) || (pos >= line.end)) { break; }
900                         
901                         #else
902                                 if (inFASTA.eof()) { break; }
903                         #endif
904                         
905                         //report progress
906                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
907                         
908                 }
909                 //report progress
910                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
911                 
912                 
913                 inFASTA.close();
914                 trimFASTAFile.close();
915                 scrapFASTAFile.close();
916                 if (createGroup) {       outGroupsFile.close();   }
917                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
918                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
919         if(countfile != "")     {       scrapCountFile.close(); trimCountFile.close();  }
920                 
921                 return count;
922         }
923         catch(exception& e) {
924                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
925                 exit(1);
926         }
927 }
928
929 /**************************************************************************************************/
930
931 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string trimCountFileName, string scrapCountFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
932         try {
933         
934         int process = 1;
935                 int exitCommand = 1;
936                 processIDS.clear();
937                 
938 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
939                                 //loop through and create all the processes you want
940                 while (process != processors) {
941                         int pid = fork();
942                         
943                         if (pid > 0) {
944                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
945                                 process++;
946                         }else if (pid == 0){
947                                 
948                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
949                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
950                                 vector<vector<string> > tempNameFileNames = nameFileNames;
951
952                                 if(allFiles){
953                                         ofstream temp;
954
955                                         for(int i=0;i<tempFASTAFileNames.size();i++){
956                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
957                                                         if (tempFASTAFileNames[i][j] != "") {
958                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
959                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
960
961                                                                 if(qFileName != ""){
962                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
963                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
964                                                                 }
965                                                                 if(nameFile != ""){
966                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
967                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
968                                                                 }
969                                                         }
970                                                 }
971                                         }
972                                 }
973                                                         
974                                 driverCreateTrim(filename,
975                                                                  qFileName,
976                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
977                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
978                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
979                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
980                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
981                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
982                                  (trimCountFileName + toString(getpid()) + ".temp"),
983                                                                  (scrapCountFileName + toString(getpid()) + ".temp"),
984                                                                  (groupFile + toString(getpid()) + ".temp"),
985                                                                  tempFASTAFileNames,
986                                                                  tempPrimerQualFileNames,
987                                                                  tempNameFileNames,
988                                                                  lines[process],
989                                                                  qLines[process]);
990                 
991                 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); }
992                                 
993                                 //pass groupCounts to parent
994                                 if(createGroup){
995                                         ofstream out;
996                                         string tempFile = filename + toString(getpid()) + ".num.temp";
997                                         m->openOutputFile(tempFile, out);
998                                         
999                                         out << groupCounts.size() << endl;
1000                                         
1001                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
1002                                                 out << it->first << '\t' << it->second << endl;
1003                                         }
1004                     
1005                     out << groupMap.size() << endl;
1006                     for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
1007                                                 out << it->first << '\t' << it->second << endl;
1008                                         }
1009                                         out.close();
1010                                 }
1011                                 exit(0);
1012                         }else { 
1013                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1014                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1015                                 exit(0);
1016                         }
1017                 }
1018                 
1019                 //parent do my part
1020                 ofstream temp;
1021                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
1022                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
1023                 if(qFileName != ""){
1024                         m->openOutputFile(trimQualFileName, temp);              temp.close();
1025                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
1026                 }
1027                 if (nameFile != "") {
1028                         m->openOutputFile(trimNameFileName, temp);              temp.close();
1029                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
1030                 }
1031         if (countfile != "") {
1032                         m->openOutputFile(trimCountFileName, temp);             temp.close();
1033                         m->openOutputFile(scrapCountFileName, temp);            temp.close();
1034                 }
1035
1036                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, trimCountFileName, scrapCountFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
1037                 
1038                 //force parent to wait until all the processes are done
1039                 for (int i=0;i<processIDS.size();i++) { 
1040                         int temp = processIDS[i];
1041                         wait(&temp);
1042                 }
1043 #else
1044         //////////////////////////////////////////////////////////////////////////////////////////////////////
1045                 //Windows version shared memory, so be careful when passing variables through the trimData struct. 
1046                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1047                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1048                 
1049                 vector<trimData*> pDataArray; 
1050                 DWORD   dwThreadIdArray[processors-1];
1051                 HANDLE  hThreadArray[processors-1]; 
1052                 
1053                 //Create processor worker threads.
1054                 for( int h=0; h<processors-1; h++){
1055                         
1056             string extension = "";
1057                         if (h != 0) { extension = toString(h) + ".temp"; processIDS.push_back(h); }
1058             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1059             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1060             vector<vector<string> > tempNameFileNames = nameFileNames;
1061             
1062             if(allFiles){
1063                 ofstream temp;
1064                 
1065                 for(int i=0;i<tempFASTAFileNames.size();i++){
1066                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
1067                         if (tempFASTAFileNames[i][j] != "") {
1068                             tempFASTAFileNames[i][j] += extension;
1069                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
1070                             
1071                             if(qFileName != ""){
1072                                 tempPrimerQualFileNames[i][j] += extension;
1073                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
1074                             }
1075                             if(nameFile != ""){
1076                                 tempNameFileNames[i][j] += extension;
1077                                 m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
1078                             }
1079                         }
1080                     }
1081                 }
1082             }
1083
1084             
1085                         trimData* tempTrim = new trimData(filename,
1086                                               qFileName, nameFile, countfile,
1087                                               (trimFASTAFileName+extension),
1088                                               (scrapFASTAFileName+extension),
1089                                               (trimQualFileName+extension),
1090                                               (scrapQualFileName+extension),
1091                                               (trimNameFileName+extension),
1092                                               (scrapNameFileName+extension),
1093                                               (trimCountFileName+extension),
1094                                               (scrapCountFileName+extension),
1095                                               (groupFile+extension),
1096                                               tempFASTAFileNames,
1097                                               tempPrimerQualFileNames,
1098                                               tempNameFileNames,
1099                                               lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
1100                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, 
1101                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
1102                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
1103                                              minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap, nameCount);
1104                         pDataArray.push_back(tempTrim);
1105             
1106                         hThreadArray[h] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
1107                 }
1108         
1109         //parent do my part
1110                 ofstream temp;
1111                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
1112                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
1113                 if(qFileName != ""){
1114                         m->openOutputFile(trimQualFileName, temp);              temp.close();
1115                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
1116                 }
1117                 if (nameFile != "") {
1118                         m->openOutputFile(trimNameFileName, temp);              temp.close();
1119                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
1120                 }
1121         vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1122         vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1123         vector<vector<string> > tempNameFileNames = nameFileNames;
1124         if(allFiles){
1125             ofstream temp;
1126             string extension = toString(processors-1) + ".temp";
1127             for(int i=0;i<tempFASTAFileNames.size();i++){
1128                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
1129                     if (tempFASTAFileNames[i][j] != "") {
1130                         tempFASTAFileNames[i][j] += extension;
1131                         m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
1132                         
1133                         if(qFileName != ""){
1134                             tempPrimerQualFileNames[i][j] += extension;
1135                             m->openOutputFile(tempPrimerQualFileNames[i][j], temp);             temp.close();
1136                         }
1137                         if(nameFile != ""){
1138                             tempNameFileNames[i][j] += extension;
1139                             m->openOutputFile(tempNameFileNames[i][j], temp);           temp.close();
1140                         }
1141                     }
1142                 }
1143             }
1144         }
1145         
1146                 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), tempFASTAFileNames, tempPrimerQualFileNames, tempNameFileNames, lines[processors-1], qLines[processors-1]);
1147         processIDS.push_back(processors-1);
1148
1149         
1150                 //Wait until all threads have terminated.
1151                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1152                 
1153                 //Close all thread handles and free memory allocations.
1154                 for(int i=0; i < pDataArray.size(); i++){
1155             if (pDataArray[i]->count != pDataArray[i]->lineEnd) {
1156                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->lineEnd) + " sequences assigned to it, quitting. \n"); m->control_pressed = true;
1157             }
1158                         for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
1159                 map<string, int>::iterator it2 = groupCounts.find(it->first);
1160                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
1161                 else { groupCounts[it->first] += it->second; }
1162             }
1163             for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
1164                 map<string, string>::iterator it2 = groupMap.find(it->first);
1165                 if (it2 == groupMap.end()) {    groupMap[it->first] = it->second; }
1166                 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
1167             }
1168             CloseHandle(hThreadArray[i]);
1169                         delete pDataArray[i];
1170                 }
1171         
1172 #endif          
1173         
1174         
1175         //append files
1176                 for(int i=0;i<processIDS.size();i++){
1177                         
1178                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1179                         
1180                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
1181                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
1182                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1183                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1184                         
1185                         if(qFileName != ""){
1186                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1187                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1188                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1189                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1190                         }
1191                         
1192                         if(nameFile != ""){
1193                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1194                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1195                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1196                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1197                         }
1198             
1199             if(countfile != ""){
1200                                 m->appendFiles((trimCountFileName + toString(processIDS[i]) + ".temp"), trimCountFileName);
1201                                 m->mothurRemove((trimCountFileName + toString(processIDS[i]) + ".temp"));
1202                                 m->appendFiles((scrapCountFileName + toString(processIDS[i]) + ".temp"), scrapCountFileName);
1203                                 m->mothurRemove((scrapCountFileName + toString(processIDS[i]) + ".temp"));
1204                         }
1205                         
1206                         if((createGroup)&&(countfile == "")){
1207                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1208                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1209                         }
1210                         
1211                         
1212                         if(allFiles){
1213                                 for(int j=0;j<fastaFileNames.size();j++){
1214                                         for(int k=0;k<fastaFileNames[j].size();k++){
1215                                                 if (fastaFileNames[j][k] != "") {
1216                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1217                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1218                                                         
1219                                                         if(qFileName != ""){
1220                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1221                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1222                                                         }
1223                                                         
1224                                                         if(nameFile != ""){
1225                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1226                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1227                                                         }
1228                                                 }
1229                                         }
1230                                 }
1231                         }
1232                         
1233             #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1234                         if(createGroup){
1235                                 ifstream in;
1236                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1237                                 m->openInputFile(tempFile, in);
1238                                 int tempNum;
1239                                 string group;
1240                                 
1241                                 in >> tempNum; m->gobble(in);
1242                                 
1243                                 if (tempNum != 0) {
1244                                         for (int i = 0; i < tempNum; i++) { 
1245                         int groupNum;
1246                                                 in >> group >> groupNum; m->gobble(in);
1247                         
1248                                                 map<string, int>::iterator it = groupCounts.find(group);
1249                                                 if (it == groupCounts.end()) {  groupCounts[group] = groupNum; }
1250                                                 else { groupCounts[it->first] += groupNum; }
1251                                         }
1252                                 }
1253                 in >> tempNum; m->gobble(in);
1254                 if (tempNum != 0) {
1255                                         for (int i = 0; i < tempNum; i++) { 
1256                         string group, seqName;
1257                                                 in >> seqName >> group; m->gobble(in);
1258                         
1259                                                 map<string, string>::iterator it = groupMap.find(seqName);
1260                                                 if (it == groupMap.end()) {     groupMap[seqName] = group; }
1261                                                 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
1262                                         }
1263                                 }
1264                 
1265                                 in.close(); m->mothurRemove(tempFile);
1266                         }
1267             #endif
1268                 }
1269
1270         return exitCommand;
1271         }
1272         catch(exception& e) {
1273                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1274                 exit(1);
1275         }
1276 }
1277
1278 /**************************************************************************************************/
1279
1280 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1281         try {
1282         
1283         vector<unsigned long long> fastaFilePos;
1284                 vector<unsigned long long> qfileFilePos;
1285                 
1286                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1287                 //set file positions for fasta file
1288                 fastaFilePos = m->divideFile(filename, processors);
1289                 
1290                 //get name of first sequence in each chunk
1291                 map<string, int> firstSeqNames;
1292                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1293                         ifstream in;
1294                         m->openInputFile(filename, in);
1295                         in.seekg(fastaFilePos[i]);
1296                 
1297                         Sequence temp(in); 
1298                         firstSeqNames[temp.getName()] = i;
1299                 
1300                         in.close();
1301                 }
1302                 
1303                 if(qfilename != "")     {
1304             //seach for filePos of each first name in the qfile and save in qfileFilePos
1305             ifstream inQual;
1306             m->openInputFile(qfilename, inQual);
1307             
1308             string input;
1309             while(!inQual.eof()){       
1310                 input = m->getline(inQual);
1311                 
1312                 if (input.length() != 0) {
1313                     if(input[0] == '>'){ //this is a sequence name line
1314                         istringstream nameStream(input);
1315                         
1316                         string sname = "";  nameStream >> sname;
1317                         sname = sname.substr(1);
1318                         
1319                         map<string, int>::iterator it = firstSeqNames.find(sname);
1320                         
1321                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
1322                             unsigned long long pos = inQual.tellg(); 
1323                             qfileFilePos.push_back(pos - input.length() - 1);   
1324                             firstSeqNames.erase(it);
1325                         }
1326                     }
1327                 }
1328                 
1329                 if (firstSeqNames.size() == 0) { break; }
1330             }
1331             inQual.close();
1332             
1333             
1334             if (firstSeqNames.size() != 0) { 
1335                 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1336                     m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1337                 }
1338                 qFileName = "";
1339                 return processors;
1340             }
1341             
1342             //get last file position of qfile
1343             FILE * pFile;
1344             unsigned long long size;
1345             
1346             //get num bytes in file
1347             pFile = fopen (qfilename.c_str(),"rb");
1348             if (pFile==NULL) perror ("Error opening file");
1349             else{
1350                 fseek (pFile, 0, SEEK_END);
1351                 size=ftell (pFile);
1352                 fclose (pFile);
1353             }
1354             
1355             qfileFilePos.push_back(size);
1356         }
1357         
1358         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1359             if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); }
1360                         lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1361                         if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
1362                 }       
1363                 if(qfilename == "")     {       qLines = lines; } //files with duds
1364                 
1365                 return processors;
1366                 
1367                 #else
1368             
1369         if (processors == 1) { //save time
1370                         //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1371                         //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1372             lines.push_back(linePair(0, 1000));
1373             if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
1374         }else{
1375             int numFastaSeqs = 0;
1376             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
1377             if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1378         
1379             if (qfilename != "") { 
1380                 int numQualSeqs = 0;
1381                 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
1382                 
1383                 if (numFastaSeqs != numQualSeqs) {
1384                     m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
1385                 }
1386             }
1387         
1388             //figure out how many sequences you have to process
1389             int numSeqsPerProcessor = numFastaSeqs / processors;
1390             for (int i = 0; i < processors; i++) {
1391                 int startIndex =  i * numSeqsPerProcessor;
1392                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
1393                 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1394                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1395             }
1396         }
1397             if(qfilename == "") {       qLines = lines; } //files with duds
1398                         return 1;
1399                 
1400                 #endif
1401         }
1402         catch(exception& e) {
1403                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1404                 exit(1);
1405         }
1406 }
1407
1408 //***************************************************************************************************************
1409
1410 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1411         try {
1412                 ifstream inOligos;
1413                 m->openInputFile(oligoFile, inOligos);
1414                 
1415                 ofstream test;
1416                 
1417                 string type, oligo, group;
1418
1419                 int indexPrimer = 0;
1420                 int indexBarcode = 0;
1421                 
1422                 while(!inOligos.eof()){
1423
1424                         inOligos >> type; 
1425             
1426                         if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
1427             
1428                         if(type[0] == '#'){
1429                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1430                                 m->gobble(inOligos);
1431                         }
1432                         else{
1433                                 m->gobble(inOligos);
1434                                 //make type case insensitive
1435                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1436                                 
1437                                 inOligos >> oligo;
1438                 
1439                 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
1440                                 
1441                                 for(int i=0;i<oligo.length();i++){
1442                                         oligo[i] = toupper(oligo[i]);
1443                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1444                                 }
1445                                 
1446                                 if(type == "FORWARD"){
1447                                         group = "";
1448                                         
1449                                         // get rest of line in case there is a primer name
1450                                         while (!inOligos.eof()) {       
1451                                                 char c = inOligos.get(); 
1452                                                 if (c == 10 || c == 13){        break;  }
1453                                                 else if (c == 32 || c == 9){;} //space or tab
1454                                                 else {  group += c;  }
1455                                         } 
1456                                         
1457                                         //check for repeat barcodes
1458                                         map<string, int>::iterator itPrime = primers.find(oligo);
1459                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1460                                         
1461                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); }  }
1462                     
1463                                         primers[oligo]=indexPrimer; indexPrimer++;              
1464                                         primerNameVector.push_back(group);
1465                                 }
1466                                 else if(type == "REVERSE"){
1467                                         //Sequence oligoRC("reverse", oligo);
1468                                         //oligoRC.reverseComplement();
1469                     string oligoRC = reverseOligo(oligo);
1470                                         revPrimer.push_back(oligoRC);
1471                                 }
1472                                 else if(type == "BARCODE"){
1473                                         inOligos >> group;
1474                                         
1475                                         //check for repeat barcodes
1476                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1477                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1478                     
1479                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1480                                         barcodeNameVector.push_back(group);
1481                                 }else if(type == "LINKER"){
1482                                         linker.push_back(oligo);
1483                                 }else if(type == "SPACER"){
1484                                         spacer.push_back(oligo);
1485                                 }
1486                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1487                         }
1488                         m->gobble(inOligos);
1489                 }       
1490                 inOligos.close();
1491                 
1492                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1493                 
1494                 //add in potential combos
1495                 if(barcodeNameVector.size() == 0){
1496                         barcodes[""] = 0;
1497                         barcodeNameVector.push_back("");                        
1498                 }
1499                 
1500                 if(primerNameVector.size() == 0){
1501                         primers[""] = 0;
1502                         primerNameVector.push_back("");                 
1503                 }
1504                 
1505                 fastaFileNames.resize(barcodeNameVector.size());
1506                 for(int i=0;i<fastaFileNames.size();i++){
1507                         fastaFileNames[i].assign(primerNameVector.size(), "");
1508                 }
1509                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1510                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1511                 
1512                 if(allFiles){
1513                         set<string> uniqueNames; //used to cleanup outputFileNames
1514                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1515                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1516                                         
1517                                         string primerName = primerNameVector[itPrimer->second];
1518                                         string barcodeName = barcodeNameVector[itBar->second];
1519                                         
1520                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing 
1521                                         else {
1522                         string comboGroupName = "";
1523                         string fastaFileName = "";
1524                         string qualFileName = "";
1525                         string nameFileName = "";
1526                         string countFileName = "";
1527                         
1528                         if(primerName == ""){
1529                             comboGroupName = barcodeNameVector[itBar->second];
1530                         }
1531                         else{
1532                             if(barcodeName == ""){
1533                                 comboGroupName = primerNameVector[itPrimer->second];
1534                             }
1535                             else{
1536                                 comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1537                             }
1538                         }
1539                         
1540                         
1541                         ofstream temp;
1542                         map<string, string> variables; 
1543                         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
1544                         variables["[tag]"] = comboGroupName;
1545                         fastaFileName = getOutputFileName("fasta", variables);
1546                         if (uniqueNames.count(fastaFileName) == 0) {
1547                             outputNames.push_back(fastaFileName);
1548                             outputTypes["fasta"].push_back(fastaFileName);
1549                             uniqueNames.insert(fastaFileName);
1550                         }
1551                         
1552                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1553                         m->openOutputFile(fastaFileName, temp);         temp.close();
1554                         
1555                         if(qFileName != ""){
1556                             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
1557                             qualFileName = getOutputFileName("qfile", variables);
1558                             if (uniqueNames.count(qualFileName) == 0) {
1559                                 outputNames.push_back(qualFileName);
1560                                 outputTypes["qfile"].push_back(qualFileName);
1561                             }
1562                             
1563                             qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1564                             m->openOutputFile(qualFileName, temp);              temp.close();
1565                         }
1566                         
1567                         if(nameFile != ""){
1568                             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
1569                             nameFileName = getOutputFileName("name", variables);
1570                             if (uniqueNames.count(nameFileName) == 0) {
1571                                 outputNames.push_back(nameFileName);
1572                                 outputTypes["name"].push_back(nameFileName);
1573                             }
1574                             
1575                             nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1576                             m->openOutputFile(nameFileName, temp);              temp.close();
1577                         }
1578                     }
1579                                 }
1580                         }
1581                 }
1582                 numFPrimers = primers.size();
1583                 numRPrimers = revPrimer.size();
1584         numLinkers = linker.size();
1585         numSpacers = spacer.size();
1586                 
1587                 bool allBlank = true;
1588                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1589                         if (barcodeNameVector[i] != "") {
1590                                 allBlank = false;
1591                                 break;
1592                         }
1593                 }
1594                 for (int i = 0; i < primerNameVector.size(); i++) {
1595                         if (primerNameVector[i] != "") {
1596                                 allBlank = false;
1597                                 break;
1598                         }
1599                 }
1600
1601                 if (allBlank) {
1602                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1603                         allFiles = false;
1604                         return false;
1605                 }
1606                 
1607                 return true;
1608                 
1609         }
1610         catch(exception& e) {
1611                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1612                 exit(1);
1613         }
1614 }
1615 //***************************************************************************************************************
1616
1617 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1618         try {
1619                 bool success = 1;
1620                 if(qscores.getName() != ""){
1621                         qscores.trimQScores(-1, keepFirst);
1622                 }
1623                 sequence.trim(keepFirst);
1624                 return success;
1625         }
1626         catch(exception& e) {
1627                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1628                 exit(1);
1629         }
1630         
1631 }       
1632
1633 //***************************************************************************************************************
1634
1635 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1636         try {
1637                 bool success = 0;
1638                 
1639                 int length = sequence.getNumBases() - removeLast;
1640                 
1641                 if(length > 0){
1642                         if(qscores.getName() != ""){
1643                                 qscores.trimQScores(-1, length);
1644                         }
1645                         sequence.trim(length);
1646                         success = 1;
1647                 }
1648                 else{
1649                         success = 0;
1650                 }
1651
1652                 return success;
1653         }
1654         catch(exception& e) {
1655                 m->errorOut(e, "removeLastTrim", "countDiffs");
1656                 exit(1);
1657         }
1658         
1659 }       
1660
1661 //***************************************************************************************************************
1662
1663 bool TrimSeqsCommand::cullLength(Sequence& seq){
1664         try {
1665         
1666                 int length = seq.getNumBases();
1667                 bool success = 0;       //guilty until proven innocent
1668                 
1669                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1670                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1671                 else                                                                                            {       success = 0;    }
1672                 
1673                 return success;
1674         
1675         }
1676         catch(exception& e) {
1677                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1678                 exit(1);
1679         }
1680         
1681 }
1682
1683 //***************************************************************************************************************
1684
1685 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1686         try {
1687                 int longHomoP = seq.getLongHomoPolymer();
1688                 bool success = 0;       //guilty until proven innocent
1689                 
1690                 if(longHomoP <= maxHomoP){      success = 1;    }
1691                 else                                    {       success = 0;    }
1692                 
1693                 return success;
1694         }
1695         catch(exception& e) {
1696                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1697                 exit(1);
1698         }
1699         
1700 }
1701 //********************************************************************/
1702 string TrimSeqsCommand::reverseOligo(string oligo){
1703         try {
1704         string reverse = "";
1705         
1706         for(int i=oligo.length()-1;i>=0;i--){
1707             
1708             if(oligo[i] == 'A')         {       reverse += 'T'; }
1709             else if(oligo[i] == 'T'){   reverse += 'A'; }
1710             else if(oligo[i] == 'U'){   reverse += 'A'; }
1711             
1712             else if(oligo[i] == 'G'){   reverse += 'C'; }
1713             else if(oligo[i] == 'C'){   reverse += 'G'; }
1714             
1715             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1716             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1717             
1718             else if(oligo[i] == 'M'){   reverse += 'K'; }
1719             else if(oligo[i] == 'K'){   reverse += 'M'; }
1720             
1721             else if(oligo[i] == 'W'){   reverse += 'W'; }
1722             else if(oligo[i] == 'S'){   reverse += 'S'; }
1723             
1724             else if(oligo[i] == 'B'){   reverse += 'V'; }
1725             else if(oligo[i] == 'V'){   reverse += 'B'; }
1726             
1727             else if(oligo[i] == 'D'){   reverse += 'H'; }
1728             else if(oligo[i] == 'H'){   reverse += 'D'; }
1729             
1730             else                                                {       reverse += 'N'; }
1731         }
1732         
1733         
1734         return reverse;
1735     }
1736         catch(exception& e) {
1737                 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1738                 exit(1);
1739         }
1740 }
1741
1742 //***************************************************************************************************************
1743
1744 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1745         try {
1746                 int numNs = seq.getAmbigBases();
1747                 bool success = 0;       //guilty until proven innocent
1748                 
1749                 if(numNs <= maxAmbig)   {       success = 1;    }
1750                 else                                    {       success = 0;    }
1751                 
1752                 return success;
1753         }
1754         catch(exception& e) {
1755                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1756                 exit(1);
1757         }
1758         
1759 }
1760 //***************************************************************************************************************