]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
added linker and spacer to trim.flows
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::setParameters(){        
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
22                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
23                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
24                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
25                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
26                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
27                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
28         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
29                 CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
30         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
31                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
32                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
33                 CommandParameter pkeepforward("keepforward", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pkeepforward);
34                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
35                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
36                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
37                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
38                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
39                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
40                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
41                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
42                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
43                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
44                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
45                         
46                 vector<string> myArray;
47                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
48                 return myArray;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string TrimSeqsCommand::getHelpString(){        
57         try {
58                 string helpString = "";
59                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
60                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
61                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
62                 helpString += "The fasta parameter is required.\n";
63                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
64                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
65                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
66                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
67                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
68                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
69                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
70                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
71                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
72                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
73         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
74                 helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
75                 helpString += "The qfile parameter allows you to provide a quality file.\n";
76                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
77                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
78                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
79                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
80                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
81                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
82                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
83                 helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
84                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
85                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
86                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
87                 helpString += "The trim.seqs command should be in the following format: \n";
88                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
89                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
90                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
91                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
92                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
93                 return helpString;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
97                 exit(1);
98         }
99 }
100
101
102 //**********************************************************************************************************************
103
104 TrimSeqsCommand::TrimSeqsCommand(){     
105         try {
106                 abort = true; calledHelp = true; 
107                 setParameters();
108                 vector<string> tempOutNames;
109                 outputTypes["fasta"] = tempOutNames;
110                 outputTypes["qfile"] = tempOutNames;
111                 outputTypes["group"] = tempOutNames;
112                 outputTypes["name"] = tempOutNames;
113         }
114         catch(exception& e) {
115                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
116                 exit(1);
117         }
118 }
119 //***************************************************************************************************************
120
121 TrimSeqsCommand::TrimSeqsCommand(string option)  {
122         try {
123                 
124                 abort = false; calledHelp = false;   
125                 comboStarts = 0;
126                 
127                 //allow user to run help
128                 if(option == "help") { help(); abort = true; calledHelp = true; }
129                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
130                 
131                 else {
132                         vector<string> myArray = setParameters();
133                         
134                         OptionParser parser(option);
135                         map<string,string> parameters = parser.getParameters();
136                         
137                         ValidParameters validParameter;
138                         map<string,string>::iterator it;
139                         
140                         //check to make sure all parameters are valid for command
141                         for (it = parameters.begin(); it != parameters.end(); it++) { 
142                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
143                         }
144                         
145                         //initialize outputTypes
146                         vector<string> tempOutNames;
147                         outputTypes["fasta"] = tempOutNames;
148                         outputTypes["qfile"] = tempOutNames;
149                         outputTypes["group"] = tempOutNames;
150                         outputTypes["name"] = tempOutNames;
151                         
152                         //if the user changes the input directory command factory will send this info to us in the output parameter 
153                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
154                         if (inputDir == "not found"){   inputDir = "";          }
155                         else {
156                                 string path;
157                                 it = parameters.find("fasta");
158                                 //user has given a template file
159                                 if(it != parameters.end()){ 
160                                         path = m->hasPath(it->second);
161                                         //if the user has not given a path then, add inputdir. else leave path alone.
162                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
163                                 }
164                                 
165                                 it = parameters.find("oligos");
166                                 //user has given a template file
167                                 if(it != parameters.end()){ 
168                                         path = m->hasPath(it->second);
169                                         //if the user has not given a path then, add inputdir. else leave path alone.
170                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
171                                 }
172                                 
173                                 it = parameters.find("qfile");
174                                 //user has given a template file
175                                 if(it != parameters.end()){ 
176                                         path = m->hasPath(it->second);
177                                         //if the user has not given a path then, add inputdir. else leave path alone.
178                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
179                                 }
180                                 
181                                 it = parameters.find("name");
182                                 //user has given a template file
183                                 if(it != parameters.end()){ 
184                                         path = m->hasPath(it->second);
185                                         //if the user has not given a path then, add inputdir. else leave path alone.
186                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
187                                 }
188                                 
189                         }
190
191                         
192                         //check for required parameters
193                         fastaFile = validParameter.validFile(parameters, "fasta", true);
194                         if (fastaFile == "not found") {                                 
195                                 fastaFile = m->getFastaFile(); 
196                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
197                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
198                         }else if (fastaFile == "not open") { abort = true; }    
199                         else { m->setFastaFile(fastaFile); }
200                         
201                         //if the user changes the output directory command factory will send this info to us in the output parameter 
202                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
203                                 outputDir = ""; 
204                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
205                         }
206                 
207                         
208                         //check for optional parameter and set defaults
209                         // ...at some point should added some additional type checking...
210                         string temp;
211                         temp = validParameter.validFile(parameters, "flip", false);
212                         if (temp == "not found")    {   flip = 0;       }
213                         else {  flip = m->isTrue(temp);         }
214                 
215                         temp = validParameter.validFile(parameters, "oligos", true);
216                         if (temp == "not found"){       oligoFile = "";         }
217                         else if(temp == "not open"){    abort = true;   } 
218                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
219                         
220                         
221                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
222                         m->mothurConvert(temp, maxAmbig);  
223
224                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
225                         m->mothurConvert(temp, maxHomoP);  
226
227                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
228                         m->mothurConvert(temp, minLength); 
229                         
230                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
231                         m->mothurConvert(temp, maxLength);
232                         
233                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
234                         m->mothurConvert(temp, bdiffs);
235                         
236                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
237                         m->mothurConvert(temp, pdiffs);
238             
239             temp = validParameter.validFile(parameters, "ldiffs", false);               if (temp == "not found") { temp = "0"; }
240                         m->mothurConvert(temp, ldiffs);
241             
242             temp = validParameter.validFile(parameters, "sdiffs", false);               if (temp == "not found") { temp = "0"; }
243                         m->mothurConvert(temp, sdiffs);
244                         
245                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
246                         m->mothurConvert(temp, tdiffs);
247                         
248                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
249                         
250                         temp = validParameter.validFile(parameters, "qfile", true);     
251                         if (temp == "not found")        {       qFileName = "";         }
252                         else if(temp == "not open")     {       abort = true;           }
253                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
254                         
255                         temp = validParameter.validFile(parameters, "name", true);      
256                         if (temp == "not found")        {       nameFile = "";          }
257                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
258                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
259                         
260                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
261                         m->mothurConvert(temp, qThreshold);
262                         
263                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
264                         qtrim = m->isTrue(temp);
265
266                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
267                         convert(temp, qRollAverage);
268
269                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
270                         convert(temp, qWindowAverage);
271
272                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
273                         convert(temp, qWindowSize);
274
275                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
276                         convert(temp, qWindowStep);
277
278                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
279                         convert(temp, qAverage);
280
281                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
282                         convert(temp, keepFirst);
283
284                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
285                         convert(temp, removeLast);
286                         
287                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
288                         allFiles = m->isTrue(temp);
289             
290             temp = validParameter.validFile(parameters, "keepforward", false);          if (temp == "not found") { temp = "F"; }
291                         keepforward = m->isTrue(temp);
292                         
293                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
294                         m->setProcessors(temp);
295                         m->mothurConvert(temp, processors); 
296                         
297                         
298                         if(allFiles && (oligoFile == "")){
299                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
300                         }
301                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
302                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
303                                 qAverage=0;
304                                 qThreshold=0;
305                         }
306                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
307                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
308                                 abort = true;
309                         }
310                         
311                         if (nameFile == "") {
312                                 vector<string> files; files.push_back(fastaFile);
313                                 parser.getNameFile(files);
314                         }
315                 }
316
317         }
318         catch(exception& e) {
319                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
320                 exit(1);
321         }
322 }
323 //***************************************************************************************************************
324
325 int TrimSeqsCommand::execute(){
326         try{
327         
328                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
329                 
330                 numFPrimers = 0;  //this needs to be initialized
331                 numRPrimers = 0;
332                 createGroup = false;
333                 vector<vector<string> > fastaFileNames;
334                 vector<vector<string> > qualFileNames;
335                 vector<vector<string> > nameFileNames;
336                 
337                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
338                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
339                 
340                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
341                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
342                 
343                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
344                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
345                 
346                 if (qFileName != "") {
347                         outputNames.push_back(trimQualFile);
348                         outputNames.push_back(scrapQualFile);
349                         outputTypes["qfile"].push_back(trimQualFile);
350                         outputTypes["qfile"].push_back(scrapQualFile); 
351                 }
352                 
353                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
354                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
355                 
356                 if (nameFile != "") {
357                         m->readNames(nameFile, nameMap);
358                         outputNames.push_back(trimNameFile);
359                         outputNames.push_back(scrapNameFile);
360                         outputTypes["name"].push_back(trimNameFile);
361                         outputTypes["name"].push_back(scrapNameFile); 
362                 }
363                 
364                 if (m->control_pressed) { return 0; }
365                 
366                 string outputGroupFileName;
367                 if(oligoFile != ""){
368                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
369                         if (createGroup) {
370                                 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
371                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
372                         }
373                 }
374                 
375         //fills lines and qlines
376                 setLines(fastaFile, qFileName);
377                 
378         if(processors == 1){
379             driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
380         }else{
381             createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
382         }       
383                 
384                 
385                 if (m->control_pressed) {  return 0; }                  
386         
387                 if(allFiles){
388                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
389                         map<string, string>::iterator it;
390                         set<string> namesToRemove;
391                         for(int i=0;i<fastaFileNames.size();i++){
392                                 for(int j=0;j<fastaFileNames[0].size();j++){
393                                         if (fastaFileNames[i][j] != "") {
394                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
395                                                         if(m->isBlank(fastaFileNames[i][j])){
396                                                                 m->mothurRemove(fastaFileNames[i][j]);
397                                                                 namesToRemove.insert(fastaFileNames[i][j]);
398                                                         
399                                                                 if(qFileName != ""){
400                                                                         m->mothurRemove(qualFileNames[i][j]);
401                                                                         namesToRemove.insert(qualFileNames[i][j]);
402                                                                 }
403                                                                 
404                                                                 if(nameFile != ""){
405                                                                         m->mothurRemove(nameFileNames[i][j]);
406                                                                         namesToRemove.insert(nameFileNames[i][j]);
407                                                                 }
408                                                         }else{  
409                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
410                                                                 if (it == uniqueFastaNames.end()) {     
411                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
412                                                                 }       
413                                                         }
414                                                 }
415                                         }
416                                 }
417                         }
418                         
419                         //remove names for outputFileNames, just cleans up the output
420                         vector<string> outputNames2;
421                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
422                         outputNames = outputNames2;
423                         
424                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
425                                 ifstream in;
426                                 m->openInputFile(it->first, in);
427                                 
428                                 ofstream out;
429                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
430                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
431                                 m->openOutputFile(thisGroupName, out);
432                                 
433                                 while (!in.eof()){
434                                         if (m->control_pressed) { break; }
435                                         
436                                         Sequence currSeq(in); m->gobble(in);
437                                         out << currSeq.getName() << '\t' << it->second << endl;
438                                 }
439                                 in.close();
440                                 out.close();
441                         }
442                 }
443                 
444                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
445
446                 //output group counts
447                 m->mothurOutEndLine();
448                 int total = 0;
449                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
450                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
451                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
452                 }
453                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
454                 
455                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
456
457                 //set fasta file as new current fastafile
458                 string current = "";
459                 itTypes = outputTypes.find("fasta");
460                 if (itTypes != outputTypes.end()) {
461                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
462                 }
463                 
464                 itTypes = outputTypes.find("name");
465                 if (itTypes != outputTypes.end()) {
466                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
467                 }
468                 
469                 itTypes = outputTypes.find("qfile");
470                 if (itTypes != outputTypes.end()) {
471                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
472                 }
473                 
474                 itTypes = outputTypes.find("group");
475                 if (itTypes != outputTypes.end()) {
476                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
477                 }
478
479                 m->mothurOutEndLine();
480                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
481                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
482                 m->mothurOutEndLine();
483                 
484                 return 0;       
485                         
486         }
487         catch(exception& e) {
488                 m->errorOut(e, "TrimSeqsCommand", "execute");
489                 exit(1);
490         }
491 }
492                 
493 /**************************************************************************************/
494
495 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair line, linePair qline) {      
496                 
497         try {
498                 
499                 ofstream trimFASTAFile;
500                 m->openOutputFile(trimFileName, trimFASTAFile);
501                 
502                 ofstream scrapFASTAFile;
503                 m->openOutputFile(scrapFileName, scrapFASTAFile);
504                 
505                 ofstream trimQualFile;
506                 ofstream scrapQualFile;
507                 if(qFileName != ""){
508                         m->openOutputFile(trimQFileName, trimQualFile);
509                         m->openOutputFile(scrapQFileName, scrapQualFile);
510                 }
511                 
512                 ofstream trimNameFile;
513                 ofstream scrapNameFile;
514                 if(nameFile != ""){
515                         m->openOutputFile(trimNFileName, trimNameFile);
516                         m->openOutputFile(scrapNFileName, scrapNameFile);
517                 }
518                 
519                 
520                 ofstream outGroupsFile;
521                 if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
522                 if(allFiles){
523                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
524                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
525                                         if (fastaFileNames[i][j] != "") {
526                                                 ofstream temp;
527                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
528                                                 if(qFileName != ""){
529                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
530                                                 }
531                                                 
532                                                 if(nameFile != ""){
533                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
534                                                 }
535                                         }
536                                 }
537                         }
538                 }
539                 
540                 ifstream inFASTA;
541                 m->openInputFile(filename, inFASTA);
542                 inFASTA.seekg(line.start);
543                 
544                 ifstream qFile;
545                 if(qFileName != "")     {
546                         m->openInputFile(qFileName, qFile);
547                         qFile.seekg(qline.start);  
548                 }
549                 
550                 int count = 0;
551                 bool moreSeqs = 1;
552                 TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
553         
554                 while (moreSeqs) {
555                                 
556                         if (m->control_pressed) { 
557                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
558                                 if (createGroup) {       outGroupsFile.close();   }
559
560                                 if(qFileName != ""){
561                                         qFile.close();
562                                 }
563                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
564
565                                 return 0;
566                         }
567                         
568                         int success = 1;
569                         string trashCode = "";
570                         int currentSeqsDiffs = 0;
571
572                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
573                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
574                         QualityScores currQual;
575                         if(qFileName != ""){
576                                 currQual = QualityScores(qFile);  m->gobble(qFile);
577                         }
578                         
579                         string origSeq = currSeq.getUnaligned();
580                         if (origSeq != "") {
581                                 
582                                 int barcodeIndex = 0;
583                                 int primerIndex = 0;
584                                 
585                 if(numLinkers != 0){
586                                         success = trimOligos.stripLinker(currSeq, currQual);
587                                         if(success > ldiffs)            {       trashCode += 'k';       }
588                                         else{ currentSeqsDiffs += success;  }
589
590                                 }
591                 
592                                 if(barcodes.size() != 0){
593                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
594                                         if(success > bdiffs)            {       trashCode += 'b';       }
595                                         else{ currentSeqsDiffs += success;  }
596                                 }
597                                 
598                 if(numSpacers != 0){
599                                         success = trimOligos.stripSpacer(currSeq, currQual);
600                                         if(success > sdiffs)            {       trashCode += 's';       }
601                                         else{ currentSeqsDiffs += success;  }
602
603                                 }
604                 
605                                 if(numFPrimers != 0){
606                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
607                                         if(success > pdiffs)            {       trashCode += 'f';       }
608                                         else{ currentSeqsDiffs += success;  }
609                                 }
610                                 
611                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
612                                 
613                                 if(numRPrimers != 0){
614                                         success = trimOligos.stripReverse(currSeq, currQual);
615                                         if(!success)                            {       trashCode += 'r';       }
616                                 }
617
618                                 if(keepFirst != 0){
619                                         success = keepFirstTrim(currSeq, currQual);
620                                 }
621                                 
622                                 if(removeLast != 0){
623                                         success = removeLastTrim(currSeq, currQual);
624                                         if(!success)                            {       trashCode += 'l';       }
625                                 }
626
627                                 
628                                 if(qFileName != ""){
629                                         int origLength = currSeq.getNumBases();
630                                         
631                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
632                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
633                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
634                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
635                                         else                                            {       success = 1;                            }
636                                         
637                                         //you don't want to trim, if it fails above then scrap it
638                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
639                                         
640                                         if(!success)                            {       trashCode += 'q';       }
641                                 }                               
642                 
643                                 if(minLength > 0 || maxLength > 0){
644                                         success = cullLength(currSeq);
645                                         if(!success)                            {       trashCode += 'l';       }
646                                 }
647                                 if(maxHomoP > 0){
648                                         success = cullHomoP(currSeq);
649                                         if(!success)                            {       trashCode += 'h';       }
650                                 }
651                                 if(maxAmbig != -1){
652                                         success = cullAmbigs(currSeq);
653                                         if(!success)                            {       trashCode += 'n';       }
654                                 }
655                                 
656                                 if(flip){               // should go last                       
657                                         currSeq.reverseComplement();
658                                         if(qFileName != ""){
659                                                 currQual.flipQScores(); 
660                                         }
661                                 }
662                                 
663                                 if(trashCode.length() == 0){
664                                         currSeq.setAligned(currSeq.getUnaligned());
665                                         currSeq.printSequence(trimFASTAFile);
666                                         
667                                         if(qFileName != ""){
668                                                 currQual.printQScores(trimQualFile);
669                                         }
670                                         
671                                         if(nameFile != ""){
672                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
673                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
674                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
675                                         }
676                                         
677                                         if (createGroup) {
678                                                 if(barcodes.size() != 0){
679                                                         string thisGroup = barcodeNameVector[barcodeIndex];
680                                                         if (primers.size() != 0) { 
681                                                                 if (primerNameVector[primerIndex] != "") { 
682                                                                         if(thisGroup != "") {
683                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
684                                                                         }else {
685                                                                                 thisGroup = primerNameVector[primerIndex]; 
686                                                                         }
687                                                                 } 
688                                                         }
689                                                         
690                                                         outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
691                                                         
692                                                         if (nameFile != "") {
693                                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
694                                                                 if (itName != nameMap.end()) { 
695                                                                         vector<string> thisSeqsNames; 
696                                                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
697                                                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
698                                                                                 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
699                                                                         }
700                                                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
701                                                         }
702                                                         
703                                                         map<string, int>::iterator it = groupCounts.find(thisGroup);
704                                                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
705                                                         else { groupCounts[it->first]++; }
706                                                                 
707                                                 }
708                                         }
709                                         
710                                         if(allFiles){
711                                                 ofstream output;
712                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
713                                                 currSeq.printSequence(output);
714                                                 output.close();
715                                                 
716                                                 if(qFileName != ""){
717                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
718                                                         currQual.printQScores(output);
719                                                         output.close();                                                 
720                                                 }
721                                                 
722                                                 if(nameFile != ""){
723                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
724                                                         if (itName != nameMap.end()) { 
725                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
726                                                                 output << itName->first << '\t' << itName->second << endl; 
727                                                                 output.close();
728                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
729                                                 }
730                                         }
731                                 }
732                                 else{
733                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
734                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
735                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
736                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
737                                         }
738                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
739                                         currSeq.setUnaligned(origSeq);
740                                         currSeq.setAligned(origSeq);
741                                         currSeq.printSequence(scrapFASTAFile);
742                                         if(qFileName != ""){
743                                                 currQual.printQScores(scrapQualFile);
744                                         }
745                                 }
746                                 count++;
747                         }
748                         
749                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
750                                 unsigned long long pos = inFASTA.tellg();
751                                 if ((pos == -1) || (pos >= line.end)) { break; }
752                         
753                         #else
754                                 if (inFASTA.eof()) { break; }
755                         #endif
756                         
757                         //report progress
758                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
759                         
760                 }
761                 //report progress
762                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
763                 
764                 
765                 inFASTA.close();
766                 trimFASTAFile.close();
767                 scrapFASTAFile.close();
768                 if (createGroup) {       outGroupsFile.close();   }
769                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
770                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
771                 
772                 return count;
773         }
774         catch(exception& e) {
775                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
776                 exit(1);
777         }
778 }
779
780 /**************************************************************************************************/
781
782 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
783         try {
784         
785         int process = 1;
786                 int exitCommand = 1;
787                 processIDS.clear();
788                 
789 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
790                                 //loop through and create all the processes you want
791                 while (process != processors) {
792                         int pid = fork();
793                         
794                         if (pid > 0) {
795                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
796                                 process++;
797                         }else if (pid == 0){
798                                 
799                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
800                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
801                                 vector<vector<string> > tempNameFileNames = nameFileNames;
802
803                                 if(allFiles){
804                                         ofstream temp;
805
806                                         for(int i=0;i<tempFASTAFileNames.size();i++){
807                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
808                                                         if (tempFASTAFileNames[i][j] != "") {
809                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
810                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
811
812                                                                 if(qFileName != ""){
813                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
814                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
815                                                                 }
816                                                                 if(nameFile != ""){
817                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
818                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
819                                                                 }
820                                                         }
821                                                 }
822                                         }
823                                 }
824                                                         
825                                 driverCreateTrim(filename,
826                                                                  qFileName,
827                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
828                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
829                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
830                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
831                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
832                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
833                                                                  (groupFile + toString(getpid()) + ".temp"),
834                                                                  tempFASTAFileNames,
835                                                                  tempPrimerQualFileNames,
836                                                                  tempNameFileNames,
837                                                                  lines[process],
838                                                                  qLines[process]);
839                                 
840                                 //pass groupCounts to parent
841                                 if(createGroup){
842                                         ofstream out;
843                                         string tempFile = filename + toString(getpid()) + ".num.temp";
844                                         m->openOutputFile(tempFile, out);
845                                         
846                                         out << groupCounts.size() << endl;
847                                         
848                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
849                                                 out << it->first << '\t' << it->second << endl;
850                                         }
851                                         out.close();
852                                 }
853                                 exit(0);
854                         }else { 
855                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
856                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
857                                 exit(0);
858                         }
859                 }
860                 
861                 //parent do my part
862                 ofstream temp;
863                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
864                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
865                 if(qFileName != ""){
866                         m->openOutputFile(trimQualFileName, temp);              temp.close();
867                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
868                 }
869                 if (nameFile != "") {
870                         m->openOutputFile(trimNameFileName, temp);              temp.close();
871                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
872                 }
873
874                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
875                 
876                 //force parent to wait until all the processes are done
877                 for (int i=0;i<processIDS.size();i++) { 
878                         int temp = processIDS[i];
879                         wait(&temp);
880                 }
881 #else
882         //////////////////////////////////////////////////////////////////////////////////////////////////////
883                 //Windows version shared memory, so be careful when passing variables through the trimData struct. 
884                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
885                 //////////////////////////////////////////////////////////////////////////////////////////////////////
886                 
887                 vector<trimData*> pDataArray; 
888                 DWORD   dwThreadIdArray[processors-1];
889                 HANDLE  hThreadArray[processors-1]; 
890                 
891                 //Create processor worker threads.
892                 for( int i=0; i<processors-1; i++){
893                         
894             string extension = "";
895                         if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
896             vector<vector<string> > tempFASTAFileNames = fastaFileNames;
897             vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
898             vector<vector<string> > tempNameFileNames = nameFileNames;
899             
900             if(allFiles){
901                 ofstream temp;
902                 
903                 for(int i=0;i<tempFASTAFileNames.size();i++){
904                     for(int j=0;j<tempFASTAFileNames[i].size();j++){
905                         if (tempFASTAFileNames[i][j] != "") {
906                             tempFASTAFileNames[i][j] += extension;
907                             m->openOutputFile(tempFASTAFileNames[i][j], temp);                  temp.close();
908                             
909                             if(qFileName != ""){
910                                 tempPrimerQualFileNames[i][j] += extension;
911                                 m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
912                             }
913                             if(nameFile != ""){
914                                 tempNameFileNames[i][j] += extension;
915                                 m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
916                             }
917                         }
918                     }
919                 }
920             }
921
922             
923                         trimData* tempTrim = new trimData(filename,
924                                               qFileName, nameFile,
925                                               (trimFASTAFileName+extension),
926                                               (scrapFASTAFileName+extension),
927                                               (trimQualFileName+extension),
928                                               (scrapQualFileName+extension),
929                                               (trimNameFileName+extension),
930                                               (scrapNameFileName+extension),
931                                               (groupFile+extension),
932                                               tempFASTAFileNames,
933                                               tempPrimerQualFileNames,
934                                               tempNameFileNames,
935                                               lines[i].start, lines[i].end, qLines[i].start, qLines[i].end, m,
936                                               pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, 
937                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
938                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
939                                              minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap);
940                         pDataArray.push_back(tempTrim);
941             
942                         hThreadArray[i] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
943                 }
944         
945         //parent do my part
946                 ofstream temp;
947                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
948                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
949                 if(qFileName != ""){
950                         m->openOutputFile(trimQualFileName, temp);              temp.close();
951                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
952                 }
953                 if (nameFile != "") {
954                         m->openOutputFile(trimNameFileName, temp);              temp.close();
955                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
956                 }
957         
958                 driverCreateTrim(filename, qFileName, (trimFASTAFileName + toString(processors-1) + ".temp"), (scrapFASTAFileName + toString(processors-1) + ".temp"), (trimQualFileName + toString(processors-1) + ".temp"), (scrapQualFileName + toString(processors-1) + ".temp"), (trimNameFileName + toString(processors-1) + ".temp"), (scrapNameFileName + toString(processors-1) + ".temp"), (groupFile + toString(processors-1) + ".temp"), fastaFileNames, qualFileNames, nameFileNames, lines[processors-1], qLines[processors-1]);
959         processIDS.push_back(processors-1);
960
961         
962                 //Wait until all threads have terminated.
963                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
964                 
965                 //Close all thread handles and free memory allocations.
966                 for(int i=0; i < pDataArray.size(); i++){
967                         for (map<string, int>::iterator it = pDataArray[i]->groupCounts.begin(); it != pDataArray[i]->groupCounts.end(); it++) {
968                 map<string, int>::iterator it2 = groupCounts.find(it->first);
969                 if (it2 == groupCounts.end()) { groupCounts[it->first] = it->second; }
970                 else { groupCounts[it->first] += it->second; }
971             }
972             CloseHandle(hThreadArray[i]);
973                         delete pDataArray[i];
974                 }
975         
976 #endif          
977         
978         
979         //append files
980                 for(int i=0;i<processIDS.size();i++){
981                         
982                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
983                         
984                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
985                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
986                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
987                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
988                         
989                         if(qFileName != ""){
990                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
991                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
992                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
993                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
994                         }
995                         
996                         if(nameFile != ""){
997                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
998                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
999                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
1000                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
1001                         }
1002                         
1003                         if(createGroup){
1004                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
1005                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
1006                         }
1007                         
1008                         
1009                         if(allFiles){
1010                                 for(int j=0;j<fastaFileNames.size();j++){
1011                                         for(int k=0;k<fastaFileNames[j].size();k++){
1012                                                 if (fastaFileNames[j][k] != "") {
1013                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
1014                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1015                                                         
1016                                                         if(qFileName != ""){
1017                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
1018                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1019                                                         }
1020                                                         
1021                                                         if(nameFile != ""){
1022                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
1023                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
1024                                                         }
1025                                                 }
1026                                         }
1027                                 }
1028                         }
1029                         
1030             #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1031                         if(createGroup){
1032                                 ifstream in;
1033                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1034                                 m->openInputFile(tempFile, in);
1035                                 int tempNum;
1036                                 string group;
1037                                 
1038                                 in >> tempNum; m->gobble(in);
1039                                 
1040                                 if (tempNum != 0) {
1041                                         while (!in.eof()) { 
1042                                                 in >> group >> tempNum; m->gobble(in);
1043                         
1044                                                 map<string, int>::iterator it = groupCounts.find(group);
1045                                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
1046                                                 else { groupCounts[it->first] += tempNum; }
1047                                         }
1048                                 }
1049                                 in.close(); m->mothurRemove(tempFile);
1050                         }
1051             #endif
1052                 }
1053
1054         return exitCommand;
1055         }
1056         catch(exception& e) {
1057                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
1058                 exit(1);
1059         }
1060 }
1061
1062 /**************************************************************************************************/
1063
1064 int TrimSeqsCommand::setLines(string filename, string qfilename) {
1065         try {
1066         
1067         vector<unsigned long long> fastaFilePos;
1068                 vector<unsigned long long> qfileFilePos;
1069                 
1070                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1071                 //set file positions for fasta file
1072                 fastaFilePos = m->divideFile(filename, processors);
1073                 
1074                 if (qfilename == "") { return processors; }
1075                 
1076                 //get name of first sequence in each chunk
1077                 map<string, int> firstSeqNames;
1078                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1079                         ifstream in;
1080                         m->openInputFile(filename, in);
1081                         in.seekg(fastaFilePos[i]);
1082                 
1083                         Sequence temp(in); 
1084                         firstSeqNames[temp.getName()] = i;
1085                 
1086                         in.close();
1087                 }
1088                                 
1089                 //seach for filePos of each first name in the qfile and save in qfileFilePos
1090                 ifstream inQual;
1091                 m->openInputFile(qfilename, inQual);
1092                 
1093                 string input;
1094                 while(!inQual.eof()){   
1095                         input = m->getline(inQual);
1096
1097                         if (input.length() != 0) {
1098                                 if(input[0] == '>'){ //this is a sequence name line
1099                                         istringstream nameStream(input);
1100                                         
1101                                         string sname = "";  nameStream >> sname;
1102                                         sname = sname.substr(1);
1103                                         
1104                                         map<string, int>::iterator it = firstSeqNames.find(sname);
1105                                         
1106                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
1107                                                 unsigned long long pos = inQual.tellg(); 
1108                                                 qfileFilePos.push_back(pos - input.length() - 1);       
1109                                                 firstSeqNames.erase(it);
1110                                         }
1111                                 }
1112                         }
1113                         
1114                         if (firstSeqNames.size() == 0) { break; }
1115                 }
1116                 inQual.close();
1117                 
1118                 
1119                 if (firstSeqNames.size() != 0) { 
1120                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
1121                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
1122                         }
1123                         qFileName = "";
1124                         return processors;
1125                 }
1126
1127                 //get last file position of qfile
1128                 FILE * pFile;
1129                 unsigned long long size;
1130                 
1131                 //get num bytes in file
1132                 pFile = fopen (qfilename.c_str(),"rb");
1133                 if (pFile==NULL) perror ("Error opening file");
1134                 else{
1135                         fseek (pFile, 0, SEEK_END);
1136                         size=ftell (pFile);
1137                         fclose (pFile);
1138                 }
1139                 
1140                 qfileFilePos.push_back(size);
1141         
1142         for (int i = 0; i < (fastaFilePos.size()-1); i++) {
1143                         lines.push_back(linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
1144                         if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[i], qfileFilePos[(i+1)]));  }
1145                 }       
1146                 if(qfilename == "")     {       qLines = lines; } //files with duds
1147                 
1148                 return processors;
1149                 
1150                 #else
1151             
1152         if (processors == 1) { //save time
1153                         //fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1154                         //fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1155             lines.push_back(linePair(0, 1000));
1156             if (qfilename != "") {  qLines.push_back(linePair(0, 1000)); }
1157         }else{
1158             int numFastaSeqs = 0;
1159             fastaFilePos = m->setFilePosFasta(filename, numFastaSeqs); 
1160         
1161             if (qfilename != "") { 
1162                 int numQualSeqs = 0;
1163                 qfileFilePos = m->setFilePosFasta(qfilename, numQualSeqs); 
1164                 
1165                 if (numFastaSeqs != numQualSeqs) {
1166                     m->mothurOut("[ERROR]: You have " + toString(numFastaSeqs) + " sequences in your fasta file, but " + toString(numQualSeqs) + " sequences in your quality file."); m->mothurOutEndLine(); m->control_pressed = true; 
1167                 }
1168             }
1169         
1170             //figure out how many sequences you have to process
1171             int numSeqsPerProcessor = numFastaSeqs / processors;
1172             for (int i = 0; i < processors; i++) {
1173                 int startIndex =  i * numSeqsPerProcessor;
1174                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
1175                 lines.push_back(linePair(fastaFilePos[startIndex], numSeqsPerProcessor));
1176                 cout << fastaFilePos[startIndex] << '\t' << numSeqsPerProcessor << endl;
1177                 if (qfilename != "") {  qLines.push_back(linePair(qfileFilePos[startIndex], numSeqsPerProcessor)); }
1178             }
1179         
1180             if(qfilename == "") {       qLines = lines; } //files with duds
1181         }
1182                         return 1;
1183                 
1184                 #endif
1185         }
1186         catch(exception& e) {
1187                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1188                 exit(1);
1189         }
1190 }
1191
1192 //***************************************************************************************************************
1193
1194 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1195         try {
1196                 ifstream inOligos;
1197                 m->openInputFile(oligoFile, inOligos);
1198                 
1199                 ofstream test;
1200                 
1201                 string type, oligo, group;
1202
1203                 int indexPrimer = 0;
1204                 int indexBarcode = 0;
1205                 
1206                 while(!inOligos.eof()){
1207
1208                         inOligos >> type; 
1209                                         
1210                         if(type[0] == '#'){
1211                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1212                                 m->gobble(inOligos);
1213                         }
1214                         else{
1215                                 m->gobble(inOligos);
1216                                 //make type case insensitive
1217                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1218                                 
1219                                 inOligos >> oligo;
1220                                 
1221                                 for(int i=0;i<oligo.length();i++){
1222                                         oligo[i] = toupper(oligo[i]);
1223                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1224                                 }
1225                                 
1226                                 if(type == "FORWARD"){
1227                                         group = "";
1228                                         
1229                                         // get rest of line in case there is a primer name
1230                                         while (!inOligos.eof()) {       
1231                                                 char c = inOligos.get(); 
1232                                                 if (c == 10 || c == 13){        break;  }
1233                                                 else if (c == 32 || c == 9){;} //space or tab
1234                                                 else {  group += c;  }
1235                                         } 
1236                                         
1237                                         //check for repeat barcodes
1238                                         map<string, int>::iterator itPrime = primers.find(oligo);
1239                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1240                                         
1241                                         primers[oligo]=indexPrimer; indexPrimer++;              
1242                                         primerNameVector.push_back(group);
1243                                 }
1244                                 else if(type == "REVERSE"){
1245                                         Sequence oligoRC("reverse", oligo);
1246                                         oligoRC.reverseComplement();
1247                                         revPrimer.push_back(oligoRC.getUnaligned());
1248                                 }
1249                                 else if(type == "BARCODE"){
1250                                         inOligos >> group;
1251                                         
1252                                         //check for repeat barcodes
1253                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1254                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1255                                                 
1256                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1257                                         barcodeNameVector.push_back(group);
1258                                 }else if(type == "LINKER"){
1259                                         linker.push_back(oligo);
1260                                 }else if(type == "SPACER"){
1261                                         spacer.push_back(oligo);
1262                                 }
1263                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
1264                         }
1265                         m->gobble(inOligos);
1266                 }       
1267                 inOligos.close();
1268                 
1269                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1270                 
1271                 //add in potential combos
1272                 if(barcodeNameVector.size() == 0){
1273                         barcodes[""] = 0;
1274                         barcodeNameVector.push_back("");                        
1275                 }
1276                 
1277                 if(primerNameVector.size() == 0){
1278                         primers[""] = 0;
1279                         primerNameVector.push_back("");                 
1280                 }
1281                 
1282                 fastaFileNames.resize(barcodeNameVector.size());
1283                 for(int i=0;i<fastaFileNames.size();i++){
1284                         fastaFileNames[i].assign(primerNameVector.size(), "");
1285                 }
1286                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1287                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1288                 
1289                 if(allFiles){
1290                         set<string> uniqueNames; //used to cleanup outputFileNames
1291                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1292                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1293                                         
1294                                         string primerName = primerNameVector[itPrimer->second];
1295                                         string barcodeName = barcodeNameVector[itBar->second];
1296                                         
1297                                         string comboGroupName = "";
1298                                         string fastaFileName = "";
1299                                         string qualFileName = "";
1300                                         string nameFileName = "";
1301                                         
1302                                         if(primerName == ""){
1303                                                 comboGroupName = barcodeNameVector[itBar->second];
1304                                         }
1305                                         else{
1306                                                 if(barcodeName == ""){
1307                                                         comboGroupName = primerNameVector[itPrimer->second];
1308                                                 }
1309                                                 else{
1310                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1311                                                 }
1312                                         }
1313                                         
1314                                         
1315                                         ofstream temp;
1316                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1317                                         if (uniqueNames.count(fastaFileName) == 0) {
1318                                                 outputNames.push_back(fastaFileName);
1319                                                 outputTypes["fasta"].push_back(fastaFileName);
1320                                                 uniqueNames.insert(fastaFileName);
1321                                         }
1322                                         
1323                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1324                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1325                                         
1326                                         if(qFileName != ""){
1327                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1328                                                 if (uniqueNames.count(qualFileName) == 0) {
1329                                                         outputNames.push_back(qualFileName);
1330                                                         outputTypes["qfile"].push_back(qualFileName);
1331                                                 }
1332                                                 
1333                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1334                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1335                                         }
1336                                         
1337                                         if(nameFile != ""){
1338                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1339                                                 if (uniqueNames.count(nameFileName) == 0) {
1340                                                         outputNames.push_back(nameFileName);
1341                                                         outputTypes["name"].push_back(nameFileName);
1342                                                 }
1343                                                 
1344                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1345                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1346                                         }
1347                                         
1348                                 }
1349                         }
1350                 }
1351                 numFPrimers = primers.size();
1352                 numRPrimers = revPrimer.size();
1353         numLinkers = linker.size();
1354         numSpacers = spacer.size();
1355                 
1356                 bool allBlank = true;
1357                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1358                         if (barcodeNameVector[i] != "") {
1359                                 allBlank = false;
1360                                 break;
1361                         }
1362                 }
1363                 for (int i = 0; i < primerNameVector.size(); i++) {
1364                         if (primerNameVector[i] != "") {
1365                                 allBlank = false;
1366                                 break;
1367                         }
1368                 }
1369                 
1370                 if (allBlank) {
1371                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1372                         allFiles = false;
1373                         return false;
1374                 }
1375                 
1376                 return true;
1377                 
1378         }
1379         catch(exception& e) {
1380                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1381                 exit(1);
1382         }
1383 }
1384 //***************************************************************************************************************
1385
1386 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1387         try {
1388                 bool success = 1;
1389                 if(qscores.getName() != ""){
1390                         qscores.trimQScores(-1, keepFirst);
1391                 }
1392                 sequence.trim(keepFirst);
1393                 return success;
1394         }
1395         catch(exception& e) {
1396                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1397                 exit(1);
1398         }
1399         
1400 }       
1401
1402 //***************************************************************************************************************
1403
1404 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1405         try {
1406                 bool success = 0;
1407                 
1408                 int length = sequence.getNumBases() - removeLast;
1409                 
1410                 if(length > 0){
1411                         if(qscores.getName() != ""){
1412                                 qscores.trimQScores(-1, length);
1413                         }
1414                         sequence.trim(length);
1415                         success = 1;
1416                 }
1417                 else{
1418                         success = 0;
1419                 }
1420
1421                 return success;
1422         }
1423         catch(exception& e) {
1424                 m->errorOut(e, "removeLastTrim", "countDiffs");
1425                 exit(1);
1426         }
1427         
1428 }       
1429
1430 //***************************************************************************************************************
1431
1432 bool TrimSeqsCommand::cullLength(Sequence& seq){
1433         try {
1434         
1435                 int length = seq.getNumBases();
1436                 bool success = 0;       //guilty until proven innocent
1437                 
1438                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1439                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1440                 else                                                                                            {       success = 0;    }
1441                 
1442                 return success;
1443         
1444         }
1445         catch(exception& e) {
1446                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1447                 exit(1);
1448         }
1449         
1450 }
1451
1452 //***************************************************************************************************************
1453
1454 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1455         try {
1456                 int longHomoP = seq.getLongHomoPolymer();
1457                 bool success = 0;       //guilty until proven innocent
1458                 
1459                 if(longHomoP <= maxHomoP){      success = 1;    }
1460                 else                                    {       success = 0;    }
1461                 
1462                 return success;
1463         }
1464         catch(exception& e) {
1465                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1466                 exit(1);
1467         }
1468         
1469 }
1470
1471 //***************************************************************************************************************
1472
1473 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1474         try {
1475                 int numNs = seq.getAmbigBases();
1476                 bool success = 0;       //guilty until proven innocent
1477                 
1478                 if(numNs <= maxAmbig)   {       success = 1;    }
1479                 else                                    {       success = 0;    }
1480                 
1481                 return success;
1482         }
1483         catch(exception& e) {
1484                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1485                 exit(1);
1486         }
1487         
1488 }
1489 //***************************************************************************************************************