]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
added count file to trim.seqs, get.groups, get.lineage, get.seqs, heatmap.sim, list...
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14
15 //**********************************************************************************************************************
16 vector<string> TrimSeqsCommand::setParameters(){        
17         try {
18                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
19                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
20                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
21                 CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
22         CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
23                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
24                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
25                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
26                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
27                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
28                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
29                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
30         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
31                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
32         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
33                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
34                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
35                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
36                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
37                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
38                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
39                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
40                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
41                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
42                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
43                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
44                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
45                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
46                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
47                         
48                 vector<string> myArray;
49                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
50                 return myArray;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
54                 exit(1);
55         }
56 }
57 //**********************************************************************************************************************
58 string TrimSeqsCommand::getHelpString(){        
59         try {
60                 string helpString = "";
61                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
62                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
63                 helpString += "The trim.seqs command parameters are fasta, name, count, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
64                 helpString += "The fasta parameter is required.\n";
65                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
66                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
67                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
68         helpString += "The count parameter allows you to provide a count file with your fasta file.\n";
69                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
70                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
71                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
72                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
73                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
74                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
75                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
76         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
77                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
78                 helpString += "The qfile parameter allows you to provide a quality file.\n";
79                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
80                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
81                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
82                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
83                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
84                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
85                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
86                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
87                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
88                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
89                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
90                 helpString += "The trim.seqs command should be in the following format: \n";
91                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
92                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
93                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
94                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
95                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
96                 return helpString;
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
100                 exit(1);
101         }
102 }
103 //**********************************************************************************************************************
104 string TrimSeqsCommand::getOutputFileNameTag(string type, string inputName=""){ 
105         try {
106         string outputFileName = "";
107                 map<string, vector<string> >::iterator it;
108         
109         //is this a type this command creates
110         it = outputTypes.find(type);
111         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
112         else {
113             if (type == "qfile")            {   outputFileName =  "qual";   }
114             else if (type == "fasta")            {   outputFileName =  "fasta";   }
115             else if (type == "group")            {   outputFileName =  "groups";   }
116             else if (type == "name")            {   outputFileName =  "names";   }
117             else if (type == "count")            {   outputFileName =  "count.table";   }
118             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
119         }
120         return outputFileName;
121         }
122         catch(exception& e) {
123                 m->errorOut(e, "TrimSeqsCommand", "getOutputFileNameTag");
124                 exit(1);
125         }
126 }
127
128
129 //**********************************************************************************************************************
130
131 TrimSeqsCommand::TrimSeqsCommand(){     
132         try {
133                 abort = true; calledHelp = true; 
134                 setParameters();
135                 vector<string> tempOutNames;
136                 outputTypes["fasta"] = tempOutNames;
137                 outputTypes["qfile"] = tempOutNames;
138                 outputTypes["group"] = tempOutNames;
139                 outputTypes["name"] = tempOutNames;
140         outputTypes["count"] = tempOutNames;
141         }
142         catch(exception& e) {
143                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
144                 exit(1);
145         }
146 }
147 //***************************************************************************************************************
148
149 TrimSeqsCommand::TrimSeqsCommand(string option)  {
150         try {
151                 
152                 abort = false; calledHelp = false;   
153                 comboStarts = 0;
154                 
155                 //allow user to run help
156                 if(option == "help") { help(); abort = true; calledHelp = true; }
157                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
158                 
159                 else {
160                         vector<string> myArray = setParameters();
161                         
162                         OptionParser parser(option);
163                         map<string,string> parameters = parser.getParameters();
164                         
165                         ValidParameters validParameter;
166                         map<string,string>::iterator it;
167                         
168                         //check to make sure all parameters are valid for command
169                         for (it = parameters.begin(); it != parameters.end(); it++) { 
170                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
171                         }
172                         
173                         //initialize outputTypes
174                         vector<string> tempOutNames;
175                         outputTypes["fasta"] = tempOutNames;
176                         outputTypes["qfile"] = tempOutNames;
177                         outputTypes["group"] = tempOutNames;
178                         outputTypes["name"] = tempOutNames;
179             outputTypes["count"] = tempOutNames;
180                         
181                         //if the user changes the input directory command factory will send this info to us in the output parameter 
182                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
183                         if (inputDir == "not found"){   inputDir = "";          }
184                         else {
185                                 string path;
186                                 it = parameters.find("fasta");
187                                 //user has given a template file
188                                 if(it != parameters.end()){ 
189                                         path = m->hasPath(it->second);
190                                         //if the user has not given a path then, add inputdir. else leave path alone.
191                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
192                                 }
193                                 
194                                 it = parameters.find("oligos");
195                                 //user has given a template file
196                                 if(it != parameters.end()){ 
197                                         path = m->hasPath(it->second);
198                                         //if the user has not given a path then, add inputdir. else leave path alone.
199                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
200                                 }
201                                 
202                                 it = parameters.find("qfile");
203                                 //user has given a template file
204                                 if(it != parameters.end()){ 
205                                         path = m->hasPath(it->second);
206                                         //if the user has not given a path then, add inputdir. else leave path alone.
207                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
208                                 }
209                                 
210                                 it = parameters.find("name");
211                                 //user has given a template file
212                                 if(it != parameters.end()){ 
213                                         path = m->hasPath(it->second);
214                                         //if the user has not given a path then, add inputdir. else leave path alone.
215                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
216                                 }
217                 
218                 it = parameters.find("count");
219                                 //user has given a template file
220                                 if(it != parameters.end()){ 
221                                         path = m->hasPath(it->second);
222                                         //if the user has not given a path then, add inputdir. else leave path alone.
223                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
224                                 }
225                                 
226                         }
227
228                         
229                         //check for required parameters
230                         fastaFile = validParameter.validFile(parameters, "fasta", true);
231                         if (fastaFile == "not found") {                                 
232                                 fastaFile = m->getFastaFile(); 
233                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
234                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
235                         }else if (fastaFile == "not open") { abort = true; }    
236                         else { m->setFastaFile(fastaFile); }
237                         
238                         //if the user changes the output directory command factory will send this info to us in the output parameter 
239                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
240                                 outputDir = ""; 
241                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
242                         }
243                 
244                         
245                         //check for optional parameter and set defaults
246                         // ...at some point should added some additional type checking...
247                         string temp;
248                         temp = validParameter.validFile(parameters, "flip", false);
249                         if (temp == "not found")    {   flip = 0;       }
250                         else {  flip = m->isTrue(temp);         }
251                 
252                         temp = validParameter.validFile(parameters, "oligos", true);
253                         if (temp == "not found"){       oligoFile = "";         }
254                         else if(temp == "not open"){    abort = true;   } 
255                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
256                         
257                         
258                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
259                         m->mothurConvert(temp, maxAmbig);  
260
261                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
262                         m->mothurConvert(temp, maxHomoP);  
263
264                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
265                         m->mothurConvert(temp, minLength); 
266                         
267                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
268                         m->mothurConvert(temp, maxLength);
269                         
270                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
271                         m->mothurConvert(temp, bdiffs);
272                         
273                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
274                         m->mothurConvert(temp, pdiffs);
275             
276             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
277                         m->mothurConvert(temp, ldiffs);
278             
279             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
280                         m->mothurConvert(temp, sdiffs);
281                         
282                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
283                         m->mothurConvert(temp, tdiffs);
284                         
285                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
286                         
287                         temp = validParameter.validFile(parameters, "qfile", true);     
288                         if (temp == "not found")        {       qFileName = "";         }
289                         else if(temp == "not open")     {       abort = true;           }
290                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
291                         
292                         temp = validParameter.validFile(parameters, "name", true);      
293                         if (temp == "not found")        {       nameFile = "";          }
294                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
295                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
296             
297             countfile = validParameter.validFile(parameters, "count", true);
298                         if (countfile == "not open") { abort = true; countfile = ""; }  
299                         else if (countfile == "not found") { countfile = ""; }
300                         else { m->setCountTableFile(countfile); }
301                         
302             if ((countfile != "") && (nameFile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
303                         
304                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
305                         m->mothurConvert(temp, qThreshold);
306                         
307                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
308                         qtrim = m->isTrue(temp);
309
310                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
311                         convert(temp, qRollAverage);
312
313                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
314                         convert(temp, qWindowAverage);
315
316                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
317                         convert(temp, qWindowSize);
318
319                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
320                         convert(temp, qWindowStep);
321
322                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
323                         convert(temp, qAverage);
324
325                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
326                         convert(temp, keepFirst);
327
328                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
329                         convert(temp, removeLast);
330                         
331                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
332                         allFiles = m->isTrue(temp);
333             
334             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
335                         keepforward = m->isTrue(temp);
336                         
337                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
338                         m->setProcessors(temp);
339                         m->mothurConvert(temp, processors); 
340                         
341                         
342                         if(allFiles && (oligoFile == "")){
343                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
344                         }
345                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
346                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
347                                 qAverage=0;
348                                 qThreshold=0;
349                         }
350                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
351                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
352                                 abort = true;
353                         }
354                         
355             if (countfile == "") {
356                 if (nameFile == "") {
357                     vector<string> files; files.push_back(fastaFile);
358                     parser.getNameFile(files);
359                 }
360             }
361                 }
362
363         }
364         catch(exception& e) {
365                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
366                 exit(1);
367         }
368 }
369 //***************************************************************************************************************
370
371 int TrimSeqsCommand::execute(){
372         try{
373         
374                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
375                 
376                 numFPrimers = 0;  //this needs to be initialized
377                 numRPrimers = 0;
378         numSpacers = 0;
379         numLinkers = 0;
380                 createGroup = false;
381                 vector<vector<string> > fastaFileNames;
382                 vector<vector<string> > qualFileNames;
383                 vector<vector<string> > nameFileNames;
384                 
385                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("fasta");
386                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
387                 
388                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("fasta");
389                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
390                 
391                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim." + getOutputFileNameTag("qfile");
392                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap." + getOutputFileNameTag("qfile");
393                 
394                 if (qFileName != "") {
395                         outputNames.push_back(trimQualFile);
396                         outputNames.push_back(scrapQualFile);
397                         outputTypes["qfile"].push_back(trimQualFile);
398                         outputTypes["qfile"].push_back(scrapQualFile); 
399                 }
400                 
401                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim." + getOutputFileNameTag("name");
402                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap." + getOutputFileNameTag("name");
403                 
404                 if (nameFile != "") {
405                         m->readNames(nameFile, nameMap);
406                         outputNames.push_back(trimNameFile);
407                         outputNames.push_back(scrapNameFile);
408                         outputTypes["name"].push_back(trimNameFile);
409                         outputTypes["name"].push_back(scrapNameFile); 
410                 }
411         
412         string trimCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + "trim." + getOutputFileNameTag("count");
413                 string scrapCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + "scrap." + getOutputFileNameTag("count");
414                 
415                 if (countfile != "") {
416             CountTable ct;
417             ct.readTable(countfile);
418             nameCount = ct.getNameMap();
419                         outputNames.push_back(trimCountFile);
420                         outputNames.push_back(scrapCountFile);
421                         outputTypes["count"].push_back(trimCountFile);
422                         outputTypes["count"].push_back(scrapCountFile); 
423                 }
424
425                 
426                 if (m->control_pressed) { return 0; }
427                 
428                 string outputGroupFileName;
429                 if(oligoFile != ""){
430                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
431                         if ((createGroup) && (countfile == "")){
432                                 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + getOutputFileNameTag("group");
433                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
434                         }
435                 }
436        
437         //fills lines and qlines
438                 setLines(fastaFile, qFileName);
439                 
440         if(processors == 1){
441             driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
442         }else{
443             createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, trimCountFile, scrapCountFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
444         }       
445                 
446                 
447                 if (m->control_pressed) {  return 0; }                  
448         
449                 if(allFiles){
450                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
451                         map<string, string>::iterator it;
452                         set<string> namesToRemove;
453                         for(int i=0;i<fastaFileNames.size();i++){
454                                 for(int j=0;j<fastaFileNames[0].size();j++){
455                                         if (fastaFileNames[i][j] != "") {
456                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
457                                                         if(m->isBlank(fastaFileNames[i][j])){
458                                                                 m->mothurRemove(fastaFileNames[i][j]);
459                                                                 namesToRemove.insert(fastaFileNames[i][j]);
460                                                         
461                                                                 if(qFileName != ""){
462                                                                         m->mothurRemove(qualFileNames[i][j]);
463                                                                         namesToRemove.insert(qualFileNames[i][j]);
464                                                                 }
465                                                                 
466                                                                 if(nameFile != ""){
467                                                                         m->mothurRemove(nameFileNames[i][j]);
468                                                                         namesToRemove.insert(nameFileNames[i][j]);
469                                                                 }
470                                                         }else{  
471                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
472                                                                 if (it == uniqueFastaNames.end()) {     
473                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
474                                                                 }       
475                                                         }
476                                                 }
477                                         }
478                                 }
479                         }
480                         
481                         //remove names for outputFileNames, just cleans up the output
482                         vector<string> outputNames2;
483                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
484                         outputNames = outputNames2;
485                         
486             for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
487                 ifstream in;
488                 m->openInputFile(it->first, in);
489                 
490                 ofstream out;
491                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first));
492                 if (countfile == "") { thisGroupName += getOutputFileNameTag("group"); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName); }
493                 else {  thisGroupName += getOutputFileNameTag("count"); outputNames.push_back(thisGroupName); outputTypes["count"].push_back(thisGroupName);  }
494                 m->openOutputFile(thisGroupName, out);
495                 
496                 if (countfile != "") {  out << "Representative_Sequence\ttotal\t" << it->second << endl;  }
497                 
498                 while (!in.eof()){
499                     if (m->control_pressed) { break; }
500                     
501                     Sequence currSeq(in); m->gobble(in);
502                     if (countfile == "") {  
503                         out << currSeq.getName() << '\t' << it->second << endl;  
504                         
505                         if (nameFile != "") {
506                             map<string, string>::iterator itName = nameMap.find(currSeq.getName());
507                             if (itName != nameMap.end()) { 
508                                 vector<string> thisSeqsNames; 
509                                 m->splitAtChar(itName->second, thisSeqsNames, ',');
510                                 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
511                                     out << thisSeqsNames[k] << '\t' << it->second << endl;
512                                 }
513                             }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                       
514                         }
515                     }else { 
516                         map<string, int>::iterator itTotalReps = nameCount.find(currSeq.getName());
517                         if (itTotalReps != nameCount.end()) { out << currSeq.getName() << '\t' << itTotalReps->second << '\t' << itTotalReps->second << endl; }
518                         else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
519                     }
520                 }
521                 in.close();
522                 out.close();
523             }
524             
525             if (countfile != "") { //create countfile with group info included
526                 CountTable* ct = new CountTable();
527                 ct->readTable(trimCountFile);
528                 map<string, int> justTrimmedNames = ct->getNameMap();
529                 delete ct;
530                 
531                 CountTable newCt;
532                 for (map<string, int>::iterator itCount = groupCounts.begin(); itCount != groupCounts.end(); itCount++) { newCt.addGroup(itCount->first); }
533                 vector<int> tempCounts; tempCounts.resize(groupCounts.size(), 0);
534                 for (map<string, int>::iterator itNames = justTrimmedNames.begin(); itNames != justTrimmedNames.end(); itNames++) {
535                     newCt.push_back(itNames->first, tempCounts); //add it to the table with no abundance so we can set the groups abundance
536                     map<string, string>::iterator it2 = groupMap.find(itNames->first);
537                     if (it2 != groupMap.end()) { newCt.setAbund(itNames->first, it2->second, itNames->second); }
538                     else { m->mothurOut("[ERROR]: missing group info for " + itNames->first + "."); m->mothurOutEndLine(); m->control_pressed = true; }
539                 }
540                 newCt.printTable(trimCountFile);
541             }
542                 }
543                 
544                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
545
546                 //output group counts
547                 m->mothurOutEndLine();
548                 int total = 0;
549                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
550                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
551                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
552                 }
553                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
554                 
555                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
556
557                 //set fasta file as new current fastafile
558                 string current = "";
559                 itTypes = outputTypes.find("fasta");
560                 if (itTypes != outputTypes.end()) {
561                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
562                 }
563                 
564                 itTypes = outputTypes.find("name");
565                 if (itTypes != outputTypes.end()) {
566                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
567                 }
568                 
569                 itTypes = outputTypes.find("qfile");
570                 if (itTypes != outputTypes.end()) {
571                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
572                 }
573                 
574                 itTypes = outputTypes.find("group");
575                 if (itTypes != outputTypes.end()) {
576                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
577                 }
578         
579         itTypes = outputTypes.find("count");
580                 if (itTypes != outputTypes.end()) {
581                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
582                 }
583
584                 m->mothurOutEndLine();
585                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
586                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
587                 m->mothurOutEndLine();
588                 
589                 return 0;       
590                         
591         }
592         catch(exception& e) {
593                 m->errorOut(e, "TrimSeqsCommand", "execute");
594                 exit(1);
595         }
596 }
597                 
598 /**************************************************************************************/
599 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string trimCFileName, string scrapCFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) { 
600                 
601         try {
602                 
603                 ofstream trimFASTAFile;
604                 m->openOutputFile(trimFileName, trimFASTAFile);
605                 
606                 ofstream scrapFASTAFile;
607                 m->openOutputFile(scrapFileName, scrapFASTAFile);
608                 
609                 ofstream trimQualFile;
610                 ofstream scrapQualFile;
611                 if(qFileName != ""){
612                         m->openOutputFile(trimQFileName, trimQualFile);
613                         m->openOutputFile(scrapQFileName, scrapQualFile);
614                 }
615                 
616                 ofstream trimNameFile;
617                 ofstream scrapNameFile;
618                 if(nameFile != ""){
619                         m->openOutputFile(trimNFileName, trimNameFile);
620                         m->openOutputFile(scrapNFileName, scrapNameFile);
621                 }
622                 
623         ofstream trimCountFile;
624                 ofstream scrapCountFile;
625                 if(countfile != ""){
626                         m->openOutputFile(trimCFileName, trimCountFile);
627                         m->openOutputFile(scrapCFileName, scrapCountFile);
628             if (line.start == 0) { trimCountFile << "Representative_Sequence\ttotal" << endl; scrapCountFile << "Representative_Sequence\ttotal" << endl; }
629                 }
630                 
631                 ofstream outGroupsFile;
632                 if ((createGroup) && (countfile == "")){        m->openOutputFile(groupFileName, outGroupsFile);   }
633                 if(allFiles){
634                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
635                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
636                                         if (fastaFileNames[i][j] != "") {
637                                                 ofstream temp;
638                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
639                                                 if(qFileName != ""){
640                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
641                                                 }
642                                                 
643                                                 if(nameFile != ""){
644                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
645                                                 }
646                                         }
647                                 }
648                         }
649                 }
650                 
651                 ifstream inFASTA;
652                 m->openInputFile(filename, inFASTA);
653                 inFASTA.seekg(line.start);
654                 
655                 ifstream qFile;
656                 if(qFileName != "")     {
657                         m->openInputFile(qFileName, qFile);
658                         qFile.seekg(qline.start);  
659                 }
660                 
661                 int count = 0;
662                 bool moreSeqs = 1;
663                 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer);
664         
665                 while (moreSeqs) {
666                                 
667                         if (m->control_pressed) { 
668                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
669                                 if ((createGroup) && (countfile == "")) {        outGroupsFile.close();   }
670                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
671                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
672                 if(countfile != "")     {       scrapCountFile.close(); trimCountFile.close();  }
673                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;
674                         }
675                         
676                         int success = 1;
677                         string trashCode = "";
678                         int currentSeqsDiffs = 0;
679
680                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
681                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
682             
683                         QualityScores currQual;
684                         if(qFileName != ""){
685                                 currQual = QualityScores(qFile);  m->gobble(qFile);
686                 //cout << currQual.getName() << endl;
687                         }
688                         
689                         string origSeq = currSeq.getUnaligned();
690                         if (origSeq != "") {
691                                 
692                                 int barcodeIndex = 0;
693                                 int primerIndex = 0;
694                                 
695                 if(numLinkers != 0){
696                                         success = trimOligos.stripLinker(currSeq, currQual);
697                                         if(success > ldiffs)            {       trashCode += 'k';       }
698                                         else{ currentSeqsDiffs += success;  }
699
700                                 }
701                 
702                                 if(barcodes.size() != 0){
703                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
704                                         if(success > bdiffs)            {       trashCode += 'b';       }
705                                         else{ currentSeqsDiffs += success;  }
706                                 }
707                 
708                 if(rbarcodes.size() != 0){
709                                         success = trimOligos.stripRBarcode(currSeq, currQual, barcodeIndex);
710                                         if(success > bdiffs)            {       trashCode += 'b';       }
711                                         else{ currentSeqsDiffs += success;  }
712                                 }
713                                 
714                 if(numSpacers != 0){
715                                         success = trimOligos.stripSpacer(currSeq, currQual);
716                                         if(success > sdiffs)            {       trashCode += 's';       }
717                                         else{ currentSeqsDiffs += success;  }
718
719                                 }
720                 
721                                 if(numFPrimers != 0){
722                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
723                                         if(success > pdiffs)            {       trashCode += 'f';       }
724                                         else{ currentSeqsDiffs += success;  }
725                                 }
726                                 
727                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
728                                 
729                                 if(numRPrimers != 0){
730                                         success = trimOligos.stripReverse(currSeq, currQual);
731                                         if(!success)                            {       trashCode += 'r';       }
732                                 }
733
734                                 if(keepFirst != 0){
735                                         success = keepFirstTrim(currSeq, currQual);
736                                 }
737                                 
738                                 if(removeLast != 0){
739                                         success = removeLastTrim(currSeq, currQual);
740                                         if(!success)                            {       trashCode += 'l';       }
741                                 }
742
743                                 
744                                 if(qFileName != ""){
745                                         int origLength = currSeq.getNumBases();
746                                         
747                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
748                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
749                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
750                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
751                                         else                                            {       success = 1;                            }
752                                         
753                                         //you don't want to trim, if it fails above then scrap it
754                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
755                                         
756                                         if(!success)                            {       trashCode += 'q';       }
757                                 }                               
758                 
759                                 if(minLength > 0 || maxLength > 0){
760                                         success = cullLength(currSeq);
761                                         if(!success)                            {       trashCode += 'l';       }
762                                 }
763                                 if(maxHomoP > 0){
764                                         success = cullHomoP(currSeq);
765                                         if(!success)                            {       trashCode += 'h';       }
766                                 }
767                                 if(maxAmbig != -1){
768                                         success = cullAmbigs(currSeq);
769                                         if(!success)                            {       trashCode += 'n';       }
770                                 }
771                                 
772                                 if(flip){               // should go last                       
773                                         currSeq.reverseComplement();
774                                         if(qFileName != ""){
775                                                 currQual.flipQScores(); 
776                                         }
777                                 }
778                                 
779                 if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + ", trashcode= " + trashCode); if (trashCode.length() != 0) { m->mothurOutEndLine(); } }
780                 
781                                 if(trashCode.length() == 0){
782                                         currSeq.setAligned(currSeq.getUnaligned());
783                                         currSeq.printSequence(trimFASTAFile);
784                                         
785                                         if(qFileName != ""){
786                                                 currQual.printQScores(trimQualFile);
787                                         }
788                                         
789                     
790                                         if(nameFile != ""){
791                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
792                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
793                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
794                                         }
795                     
796                     int numRedundants = 0;
797                     if (countfile != "") {
798                         map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
799                         if (itCount != nameCount.end()) { 
800                             trimCountFile << itCount->first << '\t' << itCount->second << endl;
801                             numRedundants = itCount->second-1;
802                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
803                     }
804                                         
805                                         if (createGroup) {
806                                                 if(barcodes.size() != 0){
807                                                         string thisGroup = barcodeNameVector[barcodeIndex];
808                                                         if (primers.size() != 0) { 
809                                                                 if (primerNameVector[primerIndex] != "") { 
810                                                                         if(thisGroup != "") {
811                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
812                                                                         }else {
813                                                                                 thisGroup = primerNameVector[primerIndex]; 
814                                                                         }
815                                                                 } 
816                                                         }
817                                                         
818                             if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
819                             
820                                                         if (countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
821                             else {   groupMap[currSeq.getName()] = thisGroup; }
822                                                         
823                                                         if (nameFile != "") {
824                                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
825                                                                 if (itName != nameMap.end()) { 
826                                                                         vector<string> thisSeqsNames; 
827                                                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
828                                     numRedundants = thisSeqsNames.size()-1; //we already include ourselves below
829                                                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
830                                                                                 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
831                                                                         }
832                                                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
833                                                         }
834                                                         
835                                                         map<string, int>::iterator it = groupCounts.find(thisGroup);
836                                                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1 + numRedundants; }
837                                                         else { groupCounts[it->first] += (1 + numRedundants); }
838                                                                 
839                                                 }
840                                         }
841                                         
842                                         if(allFiles){
843                                                 ofstream output;
844                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
845                                                 currSeq.printSequence(output);
846                                                 output.close();
847                                                 
848                                                 if(qFileName != ""){
849                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
850                                                         currQual.printQScores(output);
851                                                         output.close();                                                 
852                                                 }
853                                                 
854                                                 if(nameFile != ""){
855                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
856                                                         if (itName != nameMap.end()) { 
857                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
858                                                                 output << itName->first << '\t' << itName->second << endl; 
859                                                                 output.close();
860                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
861                                                 }
862                                         }
863                                 }
864                                 else{
865                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
866                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
867                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
868                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
869                                         }
870                     if (countfile != "") {
871                         map<string, int>::iterator itCount = nameCount.find(currSeq.getName());
872                         if (itCount != nameCount.end()) { 
873                             trimCountFile << itCount->first << '\t' << itCount->second << endl;
874                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your count file, please correct."); m->mothurOutEndLine(); }
875                     }
876                     
877                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
878                                         currSeq.setUnaligned(origSeq);
879                                         currSeq.setAligned(origSeq);
880                                         currSeq.printSequence(scrapFASTAFile);
881                                         if(qFileName != ""){
882                                                 currQual.printQScores(scrapQualFile);
883                                         }
884                                 }
885                                 count++;
886                         }
887                         
888                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
889                                 unsigned long long pos = inFASTA.tellg();
890                                 if ((pos == -1) || (pos >= line.end)) { break; }
891                         
892                         #else
893                                 if (inFASTA.eof()) { break; }
894                         #endif
895                         
896                         //report progress
897                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
898                         
899                 }
900                 //report progress
901                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
902                 
903                 
904                 inFASTA.close();
905                 trimFASTAFile.close();
906                 scrapFASTAFile.close();
907                 if (createGroup) {       outGroupsFile.close();   }
908                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
909                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
910         if(countfile != "")     {       scrapCountFile.close(); trimCountFile.close();  }
911                 
912                 return count;
913         }
914         catch(exception& e) {
915                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
916                 exit(1);
917         }
918 }
919
920 /**************************************************************************************************/
921
922 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string trimCountFileName, string scrapCountFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
923         try {
924         
925         int process = 1;
926                 int exitCommand = 1;
927                 processIDS.clear();
928                 
929 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
930                                 //loop through and create all the processes you want
931                 while (process != processors) {
932                         int pid = fork();
933                         
934                         if (pid > 0) {
935                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
936                                 process++;
937                         }else if (pid == 0){
938                                 
939                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
940                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
941                                 vector<vector<string> > tempNameFileNames = nameFileNames;
942
943                                 if(allFiles){
944                                         ofstream temp;
945
946                                         for(int i=0;i<tempFASTAFileNames.size();i++){
947                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
948                                                         if (tempFASTAFileNames[i][j] != "") {
949                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
950                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
951
952                                                                 if(qFileName != ""){
953                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
954                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
955                                                                 }
956                                                                 if(nameFile != ""){
957                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
958                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
959                                                                 }
960                                                         }
961                                                 }
962                                         }
963                                 }
964                                                         
965                                 driverCreateTrim(filename,
966                                                                  qFileName,
967                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
968                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
969                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
970                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
971                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
972                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
973                                  (trimCountFileName + toString(getpid()) + ".temp"),
974                                                                  (scrapCountFileName + toString(getpid()) + ".temp"),
975                                                                  (groupFile + toString(getpid()) + ".temp"),
976                                                                  tempFASTAFileNames,
977                                                                  tempPrimerQualFileNames,
978                                                                  tempNameFileNames,
979                                                                  lines[process],
980                                                                  qLines[process]);
981                 
982                 if (m->debug) { m->mothurOut("[DEBUG]: " + toString(lines[process].start) + '\t' + toString(qLines[process].start) + '\t' + toString(getpid()) + '\n'); }
983                                 
984                                 //pass groupCounts to parent
985                                 if(createGroup){
986                                         ofstream out;
987                                         string tempFile = filename + toString(getpid()) + ".num.temp";
988                                         m->openOutputFile(tempFile, out);
989                                         
990                                         out << groupCounts.size() << endl;
991                                         
992                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
993                                                 out << it->first << '\t' << it->second << endl;
994                                         }
995                     
996                     out << groupMap.size() << endl;
997                     for (map<string, string>::iterator it = groupMap.begin(); it != groupMap.end(); it++) {
998                                                 out << it->first << '\t' << it->second << endl;
999                                         }
1000                                         out.close();
1001                                 }
1002                                 exit(0);
1003                         }else { 
1004                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1005                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1006                                 exit(0);
1007                         }
1008                 }
1009                 
1010                 //parent do my part
1011                 ofstream temp;
1012                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
1013                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
1014                 if(qFileName != ""){
1015                         m->openOutputFile(trimQualFileName, temp);              temp.close();
1016                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
1017                 }
1018                 if (nameFile != "") {
1019                         m->openOutputFile(trimNameFileName, temp);              temp.close();
1020                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
1021                 }
1022         if (countfile != "") {
1023                         m->openOutputFile(trimCountFileName, temp);             temp.close();
1024                         m->openOutputFile(scrapCountFileName, temp);            temp.close();
1025                 }
1026
1027                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, trimCountFileName, scrapCountFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
1028                 
1029                 //force parent to wait until all the processes are done
1030                 for (int i=0;i<processIDS.size();i++) { 
1031                         int temp = processIDS[i];
1032                         wait(&temp);
1033                 }
1034 #else
1035         //////////////////////////////////////////////////////////////////////////////////////////////////////
1036                 //Windows version shared memory, so be careful when passing variables through the trimData struct. 
1037                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1038                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1039                 
1040                 vector<trimData*> pDataArray; 
1041                 DWORD   dwThreadIdArray[processors-1];
1042                 HANDLE  hThreadArray[processors-1]; 
1043                 
1044                 //Create processor worker threads.
1045                 for( int i=0; i<processors-1; i++){
1046                         
1047             string extension = "";
1048                         if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
1049             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
1050             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
1051             vector<vector<string> > tempNameFileNames = nameFileNames;
1052             
1053             if(allFiles){
1054                 ofstream temp;
1055                 
1056                 for(int i=0;i<tempFASTAFileNames.size();i++){
1057                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
1058                         if (tempFASTAFileNames[i][j] != "") {
1059                             tempFASTAFileNames[i][j] += extension;
1060                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
1061                             
1062                             if(qFileName != ""){
1063                                 tempPrimerQualFileNames[i][j] += extension;
1064                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
1065                             }
1066                             if(nameFile != ""){
1067                                 tempNameFileNames[i][j] += extension;
1068                                 m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
1069                             }
1070                         }
1071                     }
1072                 }
1073             }
1074
1075             
1076                         trimData* tempTrim = new trimData(filename,
1077                                               qFileName, nameFile, countfile,
1078                                               (trimFASTAFileName+extension),
1079                                               (scrapFASTAFileName+extension),
1080                                               (trimQualFileName+extension),
1081                                               (scrapQualFileName+extension),
1082                                               (trimNameFileName+extension),
1083                                               (scrapNameFileName+extension),
1084                                               (trimCountFileName+extension),
1085                                               (scrapCountFileName+extension),
1086                                               (groupFile+extension),
1087                                               tempFASTAFileNames,
1088                                               tempPrimerQualFileNames,
1089                                               tempNameFileNames,
1090                                               lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
1091                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, rbarcodes, revPrimer, linker, spacer, 
1092                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
1093                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
1094                                              minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap, nameCount);
1095                         pDataArray.push_back(tempTrim);
1096             
1097                         hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
1098                 }
1099         
1100         //parent do my part
1101                 ofstream temp;
1102                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
1103                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
1104                 if(qFileName != ""){
1105                         m->openOutputFile(trimQualFileName, temp);              temp.close();
1106                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
1107                 }
1108                 if (nameFile != "") {
1109                         m->openOutputFile(trimNameFileName, temp);              temp.close();
1110                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
1111                 }
1112         
1113                 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (trimCountFileName + toString(processors-1) + ".temp"), (scrapCountFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
1114         processIDS.push_back(processors-1);
1115
1116         
1117                 //Wait until all threads have terminated.
1118                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1119                 
1120                 //Close all thread handles and free memory allocations.
1121                 for(int i=0; i < pDataArray.size(); i++){
1122                         for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
1123                 map<string, int>::iterator it2 = groupCounts.find(it->first);
1124                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
1125                 else { groupCounts[it->first] += it->second; }
1126             }
1127             for (map<string, string>::iterator it = pDataArray[i]->groupMap.begin(); it != pDataArray[i]->groupMap.end(); it++) {
1128                 map<string, string>::iterator it2 = groupMap.find(it->first);
1129                 if (it2 == groupMap.end()) {    groupMap[it->first] = it->second; }
1130                 else { m->mothurOut("[ERROR]: " + it->first + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
1131             }
1132             CloseHandle(hThreadArray[i]);
1133                         delete pDataArray[i];
1134                 }
1135         
1136 #endif          
1137         
1138         
1139         //append files
1140                 for(int i=0;i<processIDS.size();i++){
1141                         
1142                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
1143                         
1144                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
1145                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
1146                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
1147                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
1148                         
1149                         if(qFileName != ""){
1150                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
1151                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
1152                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
1153                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
1154                         }
1155                         
1156                         if(nameFile != ""){
1157                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
1158                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
1159                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1160                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1161                         }
1162             
1163             if(countfile != ""){
1164                                 m->appendFiles((trimCountFileName + toString(processIDS[i]) + ".temp"), trimCountFileName);
1165                                 m->mothurRemove((trimCountFileName + toString(processIDS[i]) + ".temp"));
1166                                 m->appendFiles((scrapCountFileName + toString(processIDS[i]) + ".temp"), scrapCountFileName);
1167                                 m->mothurRemove((scrapCountFileName + toString(processIDS[i]) + ".temp"));
1168                         }
1169                         
1170                         if((createGroup)&&(countfile == "")){
1171                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1172                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1173                         }
1174                         
1175                         
1176                         if(allFiles){
1177                                 for(int j=0;j<fastaFileNames.size();j++){
1178                                         for(int k=0;k<fastaFileNames[j].size();k++){
1179                                                 if (fastaFileNames[j][k] != "") {
1180                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1181                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1182                                                         
1183                                                         if(qFileName != ""){
1184                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1185                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1186                                                         }
1187                                                         
1188                                                         if(nameFile != ""){
1189                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1190                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1191                                                         }
1192                                                 }
1193                                         }
1194                                 }
1195                         }
1196                         
1197             #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1198                         if(createGroup){
1199                                 ifstream in;
1200                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1201                                 m->openInputFile(tempFile, in);
1202                                 int tempNum;
1203                                 string group;
1204                                 
1205                                 in >> tempNum; m->gobble(in);
1206                                 
1207                                 if (tempNum != 0) {
1208                                         for (int i = 0; i < tempNum; i++) { 
1209                         int groupNum;
1210                                                 in >> group >> groupNum; m->gobble(in);
1211                         
1212                                                 map<string, int>::iterator it = groupCounts.find(group);
1213                                                 if (it == groupCounts.end()) {  groupCounts[group] = groupNum; }
1214                                                 else { groupCounts[it->first] += groupNum; }
1215                                         }
1216                                 }
1217                 in >> tempNum; m->gobble(in);
1218                 if (tempNum != 0) {
1219                                         for (int i = 0; i < tempNum; i++) { 
1220                         string group, seqName;
1221                                                 in >> seqName >> group; m->gobble(in);
1222                         
1223                                                 map<string, string>::iterator it = groupMap.find(seqName);
1224                                                 if (it == groupMap.end()) {     groupMap[seqName] = group; }
1225                                                 else { m->mothurOut("[ERROR]: " + seqName + " is in your fasta file more than once. Sequence names must be unique. please correct.\n");  }
1226                                         }
1227                                 }
1228                 
1229                                 in.close(); m->mothurRemove(tempFile);
1230                         }
1231             #endif
1232                 }
1233
1234         return exitCommand;
1235         }
1236         catch(exception& e) {
1237                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1238                 exit(1);
1239         }
1240 }
1241
1242 /**************************************************************************************************/
1243
1244 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1245         try {
1246         
1247         vector<unsigned long long> fastaFilePos;
1248                 vector<unsigned long long> qfileFilePos;
1249                 
1250                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1251                 //set file positions for fasta file
1252                 fastaFilePos = m->divideFile(filename, processors);
1253                 
1254                 //get name of first sequence in each chunk
1255                 map<string, int> firstSeqNames;
1256                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1257                         ifstream in;
1258                         m->openInputFile(filename, in);
1259                         in.seekg(fastaFilePos[i]);
1260                 
1261                         Sequence temp(in); 
1262                         firstSeqNames[temp.getName()] = i;
1263                 
1264                         in.close();
1265                 }
1266                 
1267                 if(qfilename != "")     {
1268             //seach for filePos of each first name in the qfile and save in qfileFilePos
1269             ifstream inQual;
1270             m->openInputFile(qfilename, inQual);
1271             
1272             string input;
1273             while(!inQual.eof()){       
1274                 input = m->getline(inQual);
1275                 
1276                 if (input.length() != 0) {
1277                     if(input[0] == '>'){ //this is a sequence name line
1278                         istringstream nameStream(input);
1279                         
1280                         string sname = "";  nameStream >> sname;
1281                         sname = sname.substr(1);
1282                         
1283                         map<string, int>::iterator it = firstSeqNames.find(sname);
1284                         
1285                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
1286                             unsigned long long pos = inQual.tellg(); 
1287                             qfileFilePos.push_back(pos - input.length() - 1);   
1288                             firstSeqNames.erase(it);
1289                         }
1290                     }
1291                 }
1292                 
1293                 if (firstSeqNames.size() == 0) { break; }
1294             }
1295             inQual.close();
1296             
1297             
1298             if (firstSeqNames.size() != 0) { 
1299                 for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1300                     m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1301                 }
1302                 qFileName = "";
1303                 return processors;
1304             }
1305             
1306             //get last file position of qfile
1307             FILE * pFile;
1308             unsigned long long size;
1309             
1310             //get num bytes in file
1311             pFile = fopen (qfilename.c_str(),"rb");
1312             if (pFile==NULL) perror ("Error opening file");
1313             else{
1314                 fseek (pFile, 0, SEEK_END);
1315                 size=ftell (pFile);
1316                 fclose (pFile);
1317             }
1318             
1319             qfileFilePos.push_back(size);
1320         }
1321         
1322         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1323             if (m->debug) { m->mothurOut("[DEBUG]: " + toString(i) +'\t' + toString(fastaFilePos[i]) + '\t' + toString(fastaFilePos[i+1]) + '\n'); }
1324                         lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1325                         if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
1326                 }       
1327                 if(qfilename == "")     {       qLines = lines; } //files with duds
1328                 
1329                 return processors;
1330                 
1331                 #else
1332             
1333         if (processors == 1) { //save time
1334                         //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1335                         //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1336             lines.push_back(linePair(0, 1000));
1337             if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
1338         }else{
1339             int numFastaSeqs = 0;
1340             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
1341             if (fastaFilePos.size() < processors) { processors = fastaFilePos.size(); }
1342         
1343             if (qfilename != "") { 
1344                 int numQualSeqs = 0;
1345                 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
1346                 
1347                 if (numFastaSeqs != numQualSeqs) {
1348                     m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
1349                 }
1350             }
1351         
1352             //figure out how many sequences you have to process
1353             int numSeqsPerProcessor = numFastaSeqs / processors;
1354             for (int i = 0; i < processors; i++) {
1355                 int startIndex =  i * numSeqsPerProcessor;
1356                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
1357                 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1358                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1359             }
1360         }
1361             if(qfilename == "") {       qLines = lines; } //files with duds
1362                         return 1;
1363                 
1364                 #endif
1365         }
1366         catch(exception& e) {
1367                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1368                 exit(1);
1369         }
1370 }
1371
1372 //***************************************************************************************************************
1373
1374 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1375         try {
1376                 ifstream inOligos;
1377                 m->openInputFile(oligoFile, inOligos);
1378                 
1379                 ofstream test;
1380                 
1381                 string type, oligo, group;
1382
1383                 int indexPrimer = 0;
1384                 int indexBarcode = 0;
1385                 
1386                 while(!inOligos.eof()){
1387
1388                         inOligos >> type; 
1389             
1390                         if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
1391             
1392                         if(type[0] == '#'){
1393                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1394                                 m->gobble(inOligos);
1395                         }
1396                         else{
1397                                 m->gobble(inOligos);
1398                                 //make type case insensitive
1399                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1400                                 
1401                                 inOligos >> oligo;
1402                 
1403                 if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
1404                                 
1405                                 for(int i=0;i<oligo.length();i++){
1406                                         oligo[i] = toupper(oligo[i]);
1407                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1408                                 }
1409                                 
1410                                 if(type == "FORWARD"){
1411                                         group = "";
1412                                         
1413                                         // get rest of line in case there is a primer name
1414                                         while (!inOligos.eof()) {       
1415                                                 char c = inOligos.get(); 
1416                                                 if (c == 10 || c == 13){        break;  }
1417                                                 else if (c == 32 || c == 9){;} //space or tab
1418                                                 else {  group += c;  }
1419                                         } 
1420                                         
1421                                         //check for repeat barcodes
1422                                         map<string, int>::iterator itPrime = primers.find(oligo);
1423                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1424                                         
1425                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); }  }
1426                     
1427                                         primers[oligo]=indexPrimer; indexPrimer++;              
1428                                         primerNameVector.push_back(group);
1429                                 }
1430                                 else if(type == "REVERSE"){
1431                                         //Sequence oligoRC("reverse", oligo);
1432                                         //oligoRC.reverseComplement();
1433                     string oligoRC = reverseOligo(oligo);
1434                                         revPrimer.push_back(oligoRC);
1435                                 }
1436                                 else if(type == "BARCODE"){
1437                                         inOligos >> group;
1438                     
1439                     //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
1440                     //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
1441                                         string temp = "";
1442                     while (!inOligos.eof())     {       
1443                                                 char c = inOligos.get(); 
1444                                                 if (c == 10 || c == 13){        break;  }
1445                                                 else if (c == 32 || c == 9){;} //space or tab
1446                                                 else {  temp += c;  }
1447                                         } 
1448                                         
1449                     //then this is illumina data with 4 columns
1450                     if (temp != "") {  
1451                         
1452                         for(int i=0;i<group.length();i++){
1453                             group[i] = toupper(group[i]);
1454                             if(group[i] == 'U') {       group[i] = 'T'; }
1455                         }
1456                         
1457                         if (m->debug) { m->mothurOut("[DEBUG]: reading reverse " + group + ", and group = " + temp + ".\n"); }
1458                         
1459                         string reverseBarcode = reverseOligo(group); //reverse barcode
1460                         //check for repeat barcodes
1461                         map<string, int>::iterator itBar = rbarcodes.find(reverseBarcode);
1462                         if (itBar != rbarcodes.end()) { m->mothurOut("reverse barcode " + group + " is in your oligos file already."); m->mothurOutEndLine();  }
1463                         
1464                         group = temp;
1465                         rbarcodes[reverseBarcode]=indexBarcode; 
1466                     }else { if (m->debug) { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); } }
1467                         
1468                                         //check for repeat barcodes
1469                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1470                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1471                                                 
1472                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1473                                         barcodeNameVector.push_back(group);
1474                                 }else if(type == "LINKER"){
1475                                         linker.push_back(oligo);
1476                                 }else if(type == "SPACER"){
1477                                         spacer.push_back(oligo);
1478                                 }
1479                                 else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
1480                         }
1481                         m->gobble(inOligos);
1482                 }       
1483                 inOligos.close();
1484                 
1485                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1486                 
1487                 //add in potential combos
1488                 if(barcodeNameVector.size() == 0){
1489                         barcodes[""] = 0;
1490                         barcodeNameVector.push_back("");                        
1491                 }
1492                 
1493                 if(primerNameVector.size() == 0){
1494                         primers[""] = 0;
1495                         primerNameVector.push_back("");                 
1496                 }
1497                 
1498                 fastaFileNames.resize(barcodeNameVector.size());
1499                 for(int i=0;i<fastaFileNames.size();i++){
1500                         fastaFileNames[i].assign(primerNameVector.size(), "");
1501                 }
1502                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1503                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1504                 
1505                 if(allFiles){
1506                         set<string> uniqueNames; //used to cleanup outputFileNames
1507                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1508                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1509                                         
1510                                         string primerName = primerNameVector[itPrimer->second];
1511                                         string barcodeName = barcodeNameVector[itBar->second];
1512                                         
1513                                         string comboGroupName = "";
1514                                         string fastaFileName = "";
1515                                         string qualFileName = "";
1516                                         string nameFileName = "";
1517                     string countFileName = "";
1518                                         
1519                                         if(primerName == ""){
1520                                                 comboGroupName = barcodeNameVector[itBar->second];
1521                                         }
1522                                         else{
1523                                                 if(barcodeName == ""){
1524                                                         comboGroupName = primerNameVector[itPrimer->second];
1525                                                 }
1526                                                 else{
1527                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1528                                                 }
1529                                         }
1530                                         
1531                                         
1532                                         ofstream temp;
1533                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1534                                         if (uniqueNames.count(fastaFileName) == 0) {
1535                                                 outputNames.push_back(fastaFileName);
1536                                                 outputTypes["fasta"].push_back(fastaFileName);
1537                                                 uniqueNames.insert(fastaFileName);
1538                                         }
1539                                         
1540                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1541                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1542                                         
1543                                         if(qFileName != ""){
1544                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1545                                                 if (uniqueNames.count(qualFileName) == 0) {
1546                                                         outputNames.push_back(qualFileName);
1547                                                         outputTypes["qfile"].push_back(qualFileName);
1548                                                 }
1549                                                 
1550                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1551                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1552                                         }
1553                                         
1554                                         if(nameFile != ""){
1555                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1556                                                 if (uniqueNames.count(nameFileName) == 0) {
1557                                                         outputNames.push_back(nameFileName);
1558                                                         outputTypes["name"].push_back(nameFileName);
1559                                                 }
1560                                                 
1561                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1562                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1563                                         }
1564                                 }
1565                         }
1566                 }
1567                 numFPrimers = primers.size();
1568                 numRPrimers = revPrimer.size();
1569         numLinkers = linker.size();
1570         numSpacers = spacer.size();
1571                 
1572                 bool allBlank = true;
1573                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1574                         if (barcodeNameVector[i] != "") {
1575                                 allBlank = false;
1576                                 break;
1577                         }
1578                 }
1579                 for (int i = 0; i < primerNameVector.size(); i++) {
1580                         if (primerNameVector[i] != "") {
1581                                 allBlank = false;
1582                                 break;
1583                         }
1584                 }
1585
1586                 if (allBlank) {
1587                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1588                         allFiles = false;
1589                         return false;
1590                 }
1591                 
1592                 return true;
1593                 
1594         }
1595         catch(exception& e) {
1596                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1597                 exit(1);
1598         }
1599 }
1600 //***************************************************************************************************************
1601
1602 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1603         try {
1604                 bool success = 1;
1605                 if(qscores.getName() != ""){
1606                         qscores.trimQScores(-1, keepFirst);
1607                 }
1608                 sequence.trim(keepFirst);
1609                 return success;
1610         }
1611         catch(exception& e) {
1612                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1613                 exit(1);
1614         }
1615         
1616 }       
1617
1618 //***************************************************************************************************************
1619
1620 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1621         try {
1622                 bool success = 0;
1623                 
1624                 int length = sequence.getNumBases() - removeLast;
1625                 
1626                 if(length > 0){
1627                         if(qscores.getName() != ""){
1628                                 qscores.trimQScores(-1, length);
1629                         }
1630                         sequence.trim(length);
1631                         success = 1;
1632                 }
1633                 else{
1634                         success = 0;
1635                 }
1636
1637                 return success;
1638         }
1639         catch(exception& e) {
1640                 m->errorOut(e, "removeLastTrim", "countDiffs");
1641                 exit(1);
1642         }
1643         
1644 }       
1645
1646 //***************************************************************************************************************
1647
1648 bool TrimSeqsCommand::cullLength(Sequence& seq){
1649         try {
1650         
1651                 int length = seq.getNumBases();
1652                 bool success = 0;       //guilty until proven innocent
1653                 
1654                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1655                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1656                 else                                                                                            {       success = 0;    }
1657                 
1658                 return success;
1659         
1660         }
1661         catch(exception& e) {
1662                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1663                 exit(1);
1664         }
1665         
1666 }
1667
1668 //***************************************************************************************************************
1669
1670 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1671         try {
1672                 int longHomoP = seq.getLongHomoPolymer();
1673                 bool success = 0;       //guilty until proven innocent
1674                 
1675                 if(longHomoP <= maxHomoP){      success = 1;    }
1676                 else                                    {       success = 0;    }
1677                 
1678                 return success;
1679         }
1680         catch(exception& e) {
1681                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1682                 exit(1);
1683         }
1684         
1685 }
1686 //********************************************************************/
1687 string TrimSeqsCommand::reverseOligo(string oligo){
1688         try {
1689         string reverse = "";
1690         
1691         for(int i=oligo.length()-1;i>=0;i--){
1692             
1693             if(oligo[i] == 'A')         {       reverse += 'T'; }
1694             else if(oligo[i] == 'T'){   reverse += 'A'; }
1695             else if(oligo[i] == 'U'){   reverse += 'A'; }
1696             
1697             else if(oligo[i] == 'G'){   reverse += 'C'; }
1698             else if(oligo[i] == 'C'){   reverse += 'G'; }
1699             
1700             else if(oligo[i] == 'R'){   reverse += 'Y'; }
1701             else if(oligo[i] == 'Y'){   reverse += 'R'; }
1702             
1703             else if(oligo[i] == 'M'){   reverse += 'K'; }
1704             else if(oligo[i] == 'K'){   reverse += 'M'; }
1705             
1706             else if(oligo[i] == 'W'){   reverse += 'W'; }
1707             else if(oligo[i] == 'S'){   reverse += 'S'; }
1708             
1709             else if(oligo[i] == 'B'){   reverse += 'V'; }
1710             else if(oligo[i] == 'V'){   reverse += 'B'; }
1711             
1712             else if(oligo[i] == 'D'){   reverse += 'H'; }
1713             else if(oligo[i] == 'H'){   reverse += 'D'; }
1714             
1715             else                                                {       reverse += 'N'; }
1716         }
1717         
1718         
1719         return reverse;
1720     }
1721         catch(exception& e) {
1722                 m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
1723                 exit(1);
1724         }
1725 }
1726
1727 //***************************************************************************************************************
1728
1729 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1730         try {
1731                 int numNs = seq.getAmbigBases();
1732                 bool success = 0;       //guilty until proven innocent
1733                 
1734                 if(numNs <= maxAmbig)   {       success = 1;    }
1735                 else                                    {       success = 0;    }
1736                 
1737                 return success;
1738         }
1739         catch(exception& e) {
1740                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1741                 exit(1);
1742         }
1743         
1744 }
1745 //***************************************************************************************************************