]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
added qvalues to metastats. fixed bug with chimera.uchime location. fixed windows...
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12 #include "trimoligos.h"
13
14 //**********************************************************************************************************************
15 vector<string> TrimSeqsCommand::setParameters(){        
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
19                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
22                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
23                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
24                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
25                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
26                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
27                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
28                 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
29                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
30                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
31                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
32                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
33                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
34                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
35                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
36                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
37                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
38                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
39                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
40                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
41                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
42                         
43                 vector<string> myArray;
44                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 string TrimSeqsCommand::getHelpString(){        
54         try {
55                 string helpString = "";
56                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
57                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
58                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
59                 helpString += "The fasta parameter is required.\n";
60                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
61                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
62                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
63                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
64                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
65                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
66                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
67                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n";
68                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
69                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
70                 helpString += "The qfile parameter allows you to provide a quality file.\n";
71                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
72                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
73                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
74                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
75                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
76                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
77                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
78                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
79                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
80                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
81                 helpString += "The trim.seqs command should be in the following format: \n";
82                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
83                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
84                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
85                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
86                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
87                 return helpString;
88         }
89         catch(exception& e) {
90                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
91                 exit(1);
92         }
93 }
94
95
96 //**********************************************************************************************************************
97
98 TrimSeqsCommand::TrimSeqsCommand(){     
99         try {
100                 abort = true; calledHelp = true; 
101                 setParameters();
102                 vector<string> tempOutNames;
103                 outputTypes["fasta"] = tempOutNames;
104                 outputTypes["qfile"] = tempOutNames;
105                 outputTypes["group"] = tempOutNames;
106                 outputTypes["name"] = tempOutNames;
107         }
108         catch(exception& e) {
109                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
110                 exit(1);
111         }
112 }
113 //***************************************************************************************************************
114
115 TrimSeqsCommand::TrimSeqsCommand(string option)  {
116         try {
117                 
118                 abort = false; calledHelp = false;   
119                 comboStarts = 0;
120                 
121                 //allow user to run help
122                 if(option == "help") { help(); abort = true; calledHelp = true; }
123                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
124                 
125                 else {
126                         vector<string> myArray = setParameters();
127                         
128                         OptionParser parser(option);
129                         map<string,string> parameters = parser.getParameters();
130                         
131                         ValidParameters validParameter;
132                         map<string,string>::iterator it;
133                         
134                         //check to make sure all parameters are valid for command
135                         for (it = parameters.begin(); it != parameters.end(); it++) { 
136                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
137                         }
138                         
139                         //initialize outputTypes
140                         vector<string> tempOutNames;
141                         outputTypes["fasta"] = tempOutNames;
142                         outputTypes["qfile"] = tempOutNames;
143                         outputTypes["group"] = tempOutNames;
144                         outputTypes["name"] = tempOutNames;
145                         
146                         //if the user changes the input directory command factory will send this info to us in the output parameter 
147                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
148                         if (inputDir == "not found"){   inputDir = "";          }
149                         else {
150                                 string path;
151                                 it = parameters.find("fasta");
152                                 //user has given a template file
153                                 if(it != parameters.end()){ 
154                                         path = m->hasPath(it->second);
155                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
157                                 }
158                                 
159                                 it = parameters.find("oligos");
160                                 //user has given a template file
161                                 if(it != parameters.end()){ 
162                                         path = m->hasPath(it->second);
163                                         //if the user has not given a path then, add inputdir. else leave path alone.
164                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
165                                 }
166                                 
167                                 it = parameters.find("qfile");
168                                 //user has given a template file
169                                 if(it != parameters.end()){ 
170                                         path = m->hasPath(it->second);
171                                         //if the user has not given a path then, add inputdir. else leave path alone.
172                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
173                                 }
174                                 
175                                 it = parameters.find("name");
176                                 //user has given a template file
177                                 if(it != parameters.end()){ 
178                                         path = m->hasPath(it->second);
179                                         //if the user has not given a path then, add inputdir. else leave path alone.
180                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
181                                 }
182                                 
183                         }
184
185                         
186                         //check for required parameters
187                         fastaFile = validParameter.validFile(parameters, "fasta", true);
188                         if (fastaFile == "not found") {                                 
189                                 fastaFile = m->getFastaFile(); 
190                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
191                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
192                         }else if (fastaFile == "not open") { abort = true; }    
193                         else { m->setFastaFile(fastaFile); }
194                         
195                         //if the user changes the output directory command factory will send this info to us in the output parameter 
196                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
197                                 outputDir = ""; 
198                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
199                         }
200                 
201                         
202                         //check for optional parameter and set defaults
203                         // ...at some point should added some additional type checking...
204                         string temp;
205                         temp = validParameter.validFile(parameters, "flip", false);
206                         if (temp == "not found")    {   flip = 0;       }
207                         else {  flip = m->isTrue(temp);         }
208                 
209                         temp = validParameter.validFile(parameters, "oligos", true);
210                         if (temp == "not found"){       oligoFile = "";         }
211                         else if(temp == "not open"){    abort = true;   } 
212                         else                                    {       oligoFile = temp; m->setOligosFile(oligoFile);          }
213                         
214                         
215                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
216                         convert(temp, maxAmbig);  
217
218                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
219                         convert(temp, maxHomoP);  
220
221                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
222                         convert(temp, minLength); 
223                         
224                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
225                         convert(temp, maxLength);
226                         
227                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
228                         convert(temp, bdiffs);
229                         
230                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
231                         convert(temp, pdiffs);
232                         
233                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
234                         convert(temp, tdiffs);
235                         
236                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
237                         
238                         temp = validParameter.validFile(parameters, "qfile", true);     
239                         if (temp == "not found")        {       qFileName = "";         }
240                         else if(temp == "not open")     {       abort = true;           }
241                         else                                            {       qFileName = temp;       m->setQualFile(qFileName); }
242                         
243                         temp = validParameter.validFile(parameters, "name", true);      
244                         if (temp == "not found")        {       nameFile = "";          }
245                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
246                         else                                            {       nameFile = temp;        m->setNameFile(nameFile); }
247                         
248                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
249                         convert(temp, qThreshold);
250                         
251                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
252                         qtrim = m->isTrue(temp);
253
254                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
255                         convert(temp, qRollAverage);
256
257                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
258                         convert(temp, qWindowAverage);
259
260                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
261                         convert(temp, qWindowSize);
262
263                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
264                         convert(temp, qWindowStep);
265
266                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
267                         convert(temp, qAverage);
268
269                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
270                         convert(temp, keepFirst);
271
272                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
273                         convert(temp, removeLast);
274                         
275                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
276                         allFiles = m->isTrue(temp);
277                         
278                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
279                         m->setProcessors(temp);
280                         convert(temp, processors); 
281                         
282                         
283                         if(allFiles && (oligoFile == "")){
284                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
285                         }
286                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
287                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
288                                 qAverage=0;
289                                 qThreshold=0;
290                         }
291                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
292                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
293                                 abort = true;
294                         }
295                 }
296
297         }
298         catch(exception& e) {
299                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
300                 exit(1);
301         }
302 }
303 //***************************************************************************************************************
304
305 int TrimSeqsCommand::execute(){
306         try{
307         
308                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
309                 
310                 numFPrimers = 0;  //this needs to be initialized
311                 numRPrimers = 0;
312                 createGroup = false;
313                 vector<vector<string> > fastaFileNames;
314                 vector<vector<string> > qualFileNames;
315                 vector<vector<string> > nameFileNames;
316                 
317                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
318                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
319                 
320                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
321                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
322                 
323                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
324                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
325                 
326                 if (qFileName != "") {
327                         outputNames.push_back(trimQualFile);
328                         outputNames.push_back(scrapQualFile);
329                         outputTypes["qfile"].push_back(trimQualFile);
330                         outputTypes["qfile"].push_back(scrapQualFile); 
331                 }
332                 
333                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
334                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
335                 
336                 if (nameFile != "") {
337                         m->readNames(nameFile, nameMap);
338                         outputNames.push_back(trimNameFile);
339                         outputNames.push_back(scrapNameFile);
340                         outputTypes["name"].push_back(trimNameFile);
341                         outputTypes["name"].push_back(scrapNameFile); 
342                 }
343                 
344                 if (m->control_pressed) { return 0; }
345                 
346                 string outputGroupFileName;
347                 if(oligoFile != ""){
348                         createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
349                         if (createGroup) {
350                                 outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
351                                 outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
352                         }
353                 }
354                 
355                 vector<unsigned long long> fastaFilePos;
356                 vector<unsigned long long> qFilePos;
357                 
358                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
359                 
360                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
361                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
362                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
363                 }       
364                 if(qFileName == "")     {       qLines = lines; } //files with duds
365                 
366                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
367                                 if(processors == 1){
368                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
369                                 }else{
370                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
371                                 }       
372                 #else
373                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
374                 #endif
375                 
376                 if (m->control_pressed) {  return 0; }                  
377         
378                 if(allFiles){
379                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
380                         map<string, string>::iterator it;
381                         set<string> namesToRemove;
382                         for(int i=0;i<fastaFileNames.size();i++){
383                                 for(int j=0;j<fastaFileNames[0].size();j++){
384                                         if (fastaFileNames[i][j] != "") {
385                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
386                                                         if(m->isBlank(fastaFileNames[i][j])){
387                                                                 m->mothurRemove(fastaFileNames[i][j]);
388                                                                 namesToRemove.insert(fastaFileNames[i][j]);
389                                                         
390                                                                 if(qFileName != ""){
391                                                                         m->mothurRemove(qualFileNames[i][j]);
392                                                                         namesToRemove.insert(qualFileNames[i][j]);
393                                                                 }
394                                                                 
395                                                                 if(nameFile != ""){
396                                                                         m->mothurRemove(nameFileNames[i][j]);
397                                                                         namesToRemove.insert(nameFileNames[i][j]);
398                                                                 }
399                                                         }else{  
400                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
401                                                                 if (it == uniqueFastaNames.end()) {     
402                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
403                                                                 }       
404                                                         }
405                                                 }
406                                         }
407                                 }
408                         }
409                         
410                         //remove names for outputFileNames, just cleans up the output
411                         vector<string> outputNames2;
412                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
413                         outputNames = outputNames2;
414                         
415                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
416                                 ifstream in;
417                                 m->openInputFile(it->first, in);
418                                 
419                                 ofstream out;
420                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
421                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
422                                 m->openOutputFile(thisGroupName, out);
423                                 
424                                 while (!in.eof()){
425                                         if (m->control_pressed) { break; }
426                                         
427                                         Sequence currSeq(in); m->gobble(in);
428                                         out << currSeq.getName() << '\t' << it->second << endl;
429                                 }
430                                 in.close();
431                                 out.close();
432                         }
433                 }
434                 
435                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
436
437                 //output group counts
438                 m->mothurOutEndLine();
439                 int total = 0;
440                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
441                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
442                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
443                 }
444                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
445                 
446                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0;    }
447
448                 //set fasta file as new current fastafile
449                 string current = "";
450                 itTypes = outputTypes.find("fasta");
451                 if (itTypes != outputTypes.end()) {
452                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
453                 }
454                 
455                 itTypes = outputTypes.find("name");
456                 if (itTypes != outputTypes.end()) {
457                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
458                 }
459                 
460                 itTypes = outputTypes.find("qfile");
461                 if (itTypes != outputTypes.end()) {
462                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
463                 }
464                 
465                 itTypes = outputTypes.find("group");
466                 if (itTypes != outputTypes.end()) {
467                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
468                 }
469
470                 m->mothurOutEndLine();
471                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
472                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
473                 m->mothurOutEndLine();
474                 
475                 return 0;       
476                         
477         }
478         catch(exception& e) {
479                 m->errorOut(e, "TrimSeqsCommand", "execute");
480                 exit(1);
481         }
482 }
483                 
484 /**************************************************************************************/
485
486 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair* line, linePair* qline) {    
487                 
488         try {
489                 
490                 ofstream trimFASTAFile;
491                 m->openOutputFile(trimFileName, trimFASTAFile);
492                 
493                 ofstream scrapFASTAFile;
494                 m->openOutputFile(scrapFileName, scrapFASTAFile);
495                 
496                 ofstream trimQualFile;
497                 ofstream scrapQualFile;
498                 if(qFileName != ""){
499                         m->openOutputFile(trimQFileName, trimQualFile);
500                         m->openOutputFile(scrapQFileName, scrapQualFile);
501                 }
502                 
503                 ofstream trimNameFile;
504                 ofstream scrapNameFile;
505                 if(nameFile != ""){
506                         m->openOutputFile(trimNFileName, trimNameFile);
507                         m->openOutputFile(scrapNFileName, scrapNameFile);
508                 }
509                 
510                 
511                 ofstream outGroupsFile;
512                 if (createGroup){       m->openOutputFile(groupFileName, outGroupsFile);   }
513                 if(allFiles){
514                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
515                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
516                                         if (fastaFileNames[i][j] != "") {
517                                                 ofstream temp;
518                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
519                                                 if(qFileName != ""){
520                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
521                                                 }
522                                                 
523                                                 if(nameFile != ""){
524                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
525                                                 }
526                                         }
527                                 }
528                         }
529                 }
530                 
531                 ifstream inFASTA;
532                 m->openInputFile(filename, inFASTA);
533                 inFASTA.seekg(line->start);
534                 
535                 ifstream qFile;
536                 if(qFileName != "")     {
537                         m->openInputFile(qFileName, qFile);
538                         qFile.seekg(qline->start);  
539                 }
540                 
541                 int count = 0;
542                 bool moreSeqs = 1;
543                 TrimOligos trimOligos(pdiffs, bdiffs, primers, barcodes, revPrimer);
544         
545                 while (moreSeqs) {
546                                 
547                         if (m->control_pressed) { 
548                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
549                                 if (createGroup) {       outGroupsFile.close();   }
550
551                                 if(qFileName != ""){
552                                         qFile.close();
553                                 }
554                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }
555
556                                 return 0;
557                         }
558                         
559                         int success = 1;
560                         string trashCode = "";
561                         int currentSeqsDiffs = 0;
562
563                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
564                         //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl;
565                         QualityScores currQual;
566                         if(qFileName != ""){
567                                 currQual = QualityScores(qFile);  m->gobble(qFile);
568                         }
569                         
570                         string origSeq = currSeq.getUnaligned();
571                         if (origSeq != "") {
572                                 
573                                 int barcodeIndex = 0;
574                                 int primerIndex = 0;
575                                 
576                                 if(barcodes.size() != 0){
577                                         success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
578                                         if(success > bdiffs)            {       trashCode += 'b';       }
579                                         else{ currentSeqsDiffs += success;  }
580                                 }
581                                 
582                                 if(numFPrimers != 0){
583                                         success = trimOligos.stripForward(currSeq, currQual, primerIndex);
584                                         if(success > pdiffs)            {       trashCode += 'f';       }
585                                         else{ currentSeqsDiffs += success;  }
586                                 }
587                                 
588                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
589                                 
590                                 if(numRPrimers != 0){
591                                         success = trimOligos.stripReverse(currSeq, currQual);
592                                         if(!success)                            {       trashCode += 'r';       }
593                                 }
594
595                                 if(keepFirst != 0){
596                                         success = keepFirstTrim(currSeq, currQual);
597                                 }
598                                 
599                                 if(removeLast != 0){
600                                         success = removeLastTrim(currSeq, currQual);
601                                         if(!success)                            {       trashCode += 'l';       }
602                                 }
603
604                                 
605                                 if(qFileName != ""){
606                                         int origLength = currSeq.getNumBases();
607                                         
608                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
609                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
610                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
611                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
612                                         else                                            {       success = 1;                            }
613                                         
614                                         //you don't want to trim, if it fails above then scrap it
615                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
616                                         
617                                         if(!success)                            {       trashCode += 'q';       }
618                                 }                               
619                 
620                                 if(minLength > 0 || maxLength > 0){
621                                         success = cullLength(currSeq);
622                                         if(!success)                            {       trashCode += 'l';       }
623                                 }
624                                 if(maxHomoP > 0){
625                                         success = cullHomoP(currSeq);
626                                         if(!success)                            {       trashCode += 'h';       }
627                                 }
628                                 if(maxAmbig != -1){
629                                         success = cullAmbigs(currSeq);
630                                         if(!success)                            {       trashCode += 'n';       }
631                                 }
632                                 
633                                 if(flip){               // should go last                       
634                                         currSeq.reverseComplement();
635                                         if(qFileName != ""){
636                                                 currQual.flipQScores(); 
637                                         }
638                                 }
639                                 
640                                 if(trashCode.length() == 0){
641                                         currSeq.setAligned(currSeq.getUnaligned());
642                                         currSeq.printSequence(trimFASTAFile);
643                                         
644                                         if(qFileName != ""){
645                                                 currQual.printQScores(trimQualFile);
646                                         }
647                                         
648                                         if(nameFile != ""){
649                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
650                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
651                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
652                                         }
653                                         
654                                         if (createGroup) {
655                                                 if(barcodes.size() != 0){
656                                                         string thisGroup = barcodeNameVector[barcodeIndex];
657                                                         if (primers.size() != 0) { 
658                                                                 if (primerNameVector[primerIndex] != "") { 
659                                                                         if(thisGroup != "") {
660                                                                                 thisGroup += "." + primerNameVector[primerIndex]; 
661                                                                         }else {
662                                                                                 thisGroup = primerNameVector[primerIndex]; 
663                                                                         }
664                                                                 } 
665                                                         }
666                                                         
667                                                         outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
668                                                         
669                                                         if (nameFile != "") {
670                                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
671                                                                 if (itName != nameMap.end()) { 
672                                                                         vector<string> thisSeqsNames; 
673                                                                         m->splitAtChar(itName->second, thisSeqsNames, ',');
674                                                                         for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
675                                                                                 outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
676                                                                         }
677                                                                 }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
678                                                         }
679                                                         
680                                                         map<string, int>::iterator it = groupCounts.find(thisGroup);
681                                                         if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
682                                                         else { groupCounts[it->first]++; }
683                                                                 
684                                                 }
685                                         }
686                                         
687                                         if(allFiles){
688                                                 ofstream output;
689                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
690                                                 currSeq.printSequence(output);
691                                                 output.close();
692                                                 
693                                                 if(qFileName != ""){
694                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
695                                                         currQual.printQScores(output);
696                                                         output.close();                                                 
697                                                 }
698                                                 
699                                                 if(nameFile != ""){
700                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
701                                                         if (itName != nameMap.end()) { 
702                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
703                                                                 output << itName->first << '\t' << itName->second << endl; 
704                                                                 output.close();
705                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
706                                                 }
707                                         }
708                                 }
709                                 else{
710                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
711                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
712                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
713                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
714                                         }
715                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
716                                         currSeq.setUnaligned(origSeq);
717                                         currSeq.setAligned(origSeq);
718                                         currSeq.printSequence(scrapFASTAFile);
719                                         if(qFileName != ""){
720                                                 currQual.printQScores(scrapQualFile);
721                                         }
722                                 }
723                                 count++;
724                         }
725                         
726                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
727                                 unsigned long long pos = inFASTA.tellg();
728                                 if ((pos == -1) || (pos >= line->end)) { break; }
729                         
730                         #else
731                                 if (inFASTA.eof()) { break; }
732                         #endif
733                         
734                         //report progress
735                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
736                         
737                 }
738                 //report progress
739                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
740                 
741                 
742                 inFASTA.close();
743                 trimFASTAFile.close();
744                 scrapFASTAFile.close();
745                 if (createGroup) {       outGroupsFile.close();   }
746                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
747                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
748                 
749                 return count;
750         }
751         catch(exception& e) {
752                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
753                 exit(1);
754         }
755 }
756
757 /**************************************************************************************************/
758
759 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
760         try {
761 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
762                 int process = 1;
763                 int exitCommand = 1;
764                 processIDS.clear();
765                 
766                 //loop through and create all the processes you want
767                 while (process != processors) {
768                         int pid = fork();
769                         
770                         if (pid > 0) {
771                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
772                                 process++;
773                         }else if (pid == 0){
774                                 
775                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
776                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
777                                 vector<vector<string> > tempNameFileNames = nameFileNames;
778
779                                 if(allFiles){
780                                         ofstream temp;
781
782                                         for(int i=0;i<tempFASTAFileNames.size();i++){
783                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
784                                                         if (tempFASTAFileNames[i][j] != "") {
785                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
786                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
787
788                                                                 if(qFileName != ""){
789                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
790                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
791                                                                 }
792                                                                 if(nameFile != ""){
793                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
794                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
795                                                                 }
796                                                         }
797                                                 }
798                                         }
799                                 }
800                                                         
801                                 driverCreateTrim(filename,
802                                                                  qFileName,
803                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
804                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
805                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
806                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
807                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
808                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
809                                                                  (groupFile + toString(getpid()) + ".temp"),
810                                                                  tempFASTAFileNames,
811                                                                  tempPrimerQualFileNames,
812                                                                  tempNameFileNames,
813                                                                  lines[process],
814                                                                  qLines[process]);
815                                 
816                                 //pass groupCounts to parent
817                                 if(createGroup){
818                                         ofstream out;
819                                         string tempFile = filename + toString(getpid()) + ".num.temp";
820                                         m->openOutputFile(tempFile, out);
821                                         
822                                         out << groupCounts.size() << endl;
823                                         
824                                         for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
825                                                 out << it->first << '\t' << it->second << endl;
826                                         }
827                                         out.close();
828                                 }
829                                 exit(0);
830                         }else { 
831                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
832                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
833                                 exit(0);
834                         }
835                 }
836                 
837                 //parent do my part
838                 ofstream temp;
839                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
840                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
841                 if(qFileName != ""){
842                         m->openOutputFile(trimQualFileName, temp);              temp.close();
843                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
844                 }
845                 if (nameFile != "") {
846                         m->openOutputFile(trimNameFileName, temp);              temp.close();
847                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
848                 }
849
850                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
851                 
852                 //force parent to wait until all the processes are done
853                 for (int i=0;i<processIDS.size();i++) { 
854                         int temp = processIDS[i];
855                         wait(&temp);
856                 }
857                 
858                 //append files
859                 for(int i=0;i<processIDS.size();i++){
860                         
861                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
862                         
863                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
864                         m->mothurRemove((trimFASTAFileName + toString(processIDS[i]) + ".temp"));
865                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
866                         m->mothurRemove((scrapFASTAFileName + toString(processIDS[i]) + ".temp"));
867                         
868                         if(qFileName != ""){
869                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
870                                 m->mothurRemove((trimQualFileName + toString(processIDS[i]) + ".temp"));
871                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
872                                 m->mothurRemove((scrapQualFileName + toString(processIDS[i]) + ".temp"));
873                         }
874                         
875                         if(nameFile != ""){
876                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
877                                 m->mothurRemove((trimNameFileName + toString(processIDS[i]) + ".temp"));
878                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
879                                 m->mothurRemove((scrapNameFileName + toString(processIDS[i]) + ".temp"));
880                         }
881                         
882                         if(createGroup){
883                                 m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
884                                 m->mothurRemove((groupFile + toString(processIDS[i]) + ".temp"));
885                         }
886                         
887                         
888                         if(allFiles){
889                                 for(int j=0;j<fastaFileNames.size();j++){
890                                         for(int k=0;k<fastaFileNames[j].size();k++){
891                                                 if (fastaFileNames[j][k] != "") {
892                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
893                                                         m->mothurRemove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"));
894                                                         
895                                                         if(qFileName != ""){
896                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
897                                                                 m->mothurRemove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"));
898                                                         }
899                                                         
900                                                         if(nameFile != ""){
901                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
902                                                                 m->mothurRemove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"));
903                                                         }
904                                                 }
905                                         }
906                                 }
907                         }
908                         
909                         if(createGroup){
910                                 ifstream in;
911                                 string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
912                                 m->openInputFile(tempFile, in);
913                                 int tempNum;
914                                 string group;
915                                 
916                                 in >> tempNum; m->gobble(in);
917                                 
918                                 if (tempNum != 0) {
919                                         while (!in.eof()) { 
920                                                 in >> group >> tempNum; m->gobble(in);
921                                 
922                                                 map<string, int>::iterator it = groupCounts.find(group);
923                                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
924                                                 else { groupCounts[it->first] += tempNum; }
925                                         }
926                                 }
927                                 in.close(); m->mothurRemove(tempFile);
928                         }
929                         
930                 }
931         
932                 return exitCommand;
933 #endif          
934         }
935         catch(exception& e) {
936                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
937                 exit(1);
938         }
939 }
940
941 /**************************************************************************************************/
942
943 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long long>& fastaFilePos, vector<unsigned long long>& qfileFilePos) {
944         try {
945                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
946                 //set file positions for fasta file
947                 fastaFilePos = m->divideFile(filename, processors);
948                 
949                 if (qfilename == "") { return processors; }
950                 
951                 //get name of first sequence in each chunk
952                 map<string, int> firstSeqNames;
953                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
954                         ifstream in;
955                         m->openInputFile(filename, in);
956                         in.seekg(fastaFilePos[i]);
957                 
958                         Sequence temp(in); 
959                         firstSeqNames[temp.getName()] = i;
960                 
961                         in.close();
962                 }
963                                 
964                 //seach for filePos of each first name in the qfile and save in qfileFilePos
965                 ifstream inQual;
966                 m->openInputFile(qfilename, inQual);
967                 
968                 string input;
969                 while(!inQual.eof()){   
970                         input = m->getline(inQual);
971
972                         if (input.length() != 0) {
973                                 if(input[0] == '>'){ //this is a sequence name line
974                                         istringstream nameStream(input);
975                                         
976                                         string sname = "";  nameStream >> sname;
977                                         sname = sname.substr(1);
978                                         
979                                         map<string, int>::iterator it = firstSeqNames.find(sname);
980                                         
981                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
982                                                 unsigned long long pos = inQual.tellg(); 
983                                                 qfileFilePos.push_back(pos - input.length() - 1);       
984                                                 firstSeqNames.erase(it);
985                                         }
986                                 }
987                         }
988                         
989                         if (firstSeqNames.size() == 0) { break; }
990                 }
991                 inQual.close();
992                 
993                 
994                 if (firstSeqNames.size() != 0) { 
995                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
996                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
997                         }
998                         qFileName = "";
999                         return processors;
1000                 }
1001
1002                 //get last file position of qfile
1003                 FILE * pFile;
1004                 unsigned long long size;
1005                 
1006                 //get num bytes in file
1007                 pFile = fopen (qfilename.c_str(),"rb");
1008                 if (pFile==NULL) perror ("Error opening file");
1009                 else{
1010                         fseek (pFile, 0, SEEK_END);
1011                         size=ftell (pFile);
1012                         fclose (pFile);
1013                 }
1014                 
1015                 qfileFilePos.push_back(size);
1016                 
1017                 return processors;
1018                 
1019                 #else
1020                 
1021                         fastaFilePos.push_back(0); qfileFilePos.push_back(0);
1022                         fastaFilePos.push_back(1000); qfileFilePos.push_back(1000);
1023                         return 1;
1024                 
1025                 #endif
1026         }
1027         catch(exception& e) {
1028                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1029                 exit(1);
1030         }
1031 }
1032
1033 //***************************************************************************************************************
1034
1035 bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1036         try {
1037                 ifstream inOligos;
1038                 m->openInputFile(oligoFile, inOligos);
1039                 
1040                 ofstream test;
1041                 
1042                 string type, oligo, group;
1043
1044                 int indexPrimer = 0;
1045                 int indexBarcode = 0;
1046                 
1047                 while(!inOligos.eof()){
1048
1049                         inOligos >> type; 
1050                                         
1051                         if(type[0] == '#'){
1052                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1053                                 m->gobble(inOligos);
1054                         }
1055                         else{
1056                                 m->gobble(inOligos);
1057                                 //make type case insensitive
1058                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1059                                 
1060                                 inOligos >> oligo;
1061                                 
1062                                 for(int i=0;i<oligo.length();i++){
1063                                         oligo[i] = toupper(oligo[i]);
1064                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1065                                 }
1066                                 
1067                                 if(type == "FORWARD"){
1068                                         group = "";
1069                                         
1070                                         // get rest of line in case there is a primer name
1071                                         while (!inOligos.eof()) {       
1072                                                 char c = inOligos.get(); 
1073                                                 if (c == 10 || c == 13){        break;  }
1074                                                 else if (c == 32 || c == 9){;} //space or tab
1075                                                 else {  group += c;  }
1076                                         } 
1077                                         
1078                                         //check for repeat barcodes
1079                                         map<string, int>::iterator itPrime = primers.find(oligo);
1080                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1081                                         
1082                                         primers[oligo]=indexPrimer; indexPrimer++;              
1083                                         primerNameVector.push_back(group);
1084                                 }
1085                                 else if(type == "REVERSE"){
1086                                         Sequence oligoRC("reverse", oligo);
1087                                         oligoRC.reverseComplement();
1088                                         revPrimer.push_back(oligoRC.getUnaligned());
1089                                 }
1090                                 else if(type == "BARCODE"){
1091                                         inOligos >> group;
1092                                         
1093                                         //check for repeat barcodes
1094                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1095                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1096                                                 
1097                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1098                                         barcodeNameVector.push_back(group);
1099                                 }
1100                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
1101                         }
1102                         m->gobble(inOligos);
1103                 }       
1104                 inOligos.close();
1105                 
1106                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1107                 
1108                 //add in potential combos
1109                 if(barcodeNameVector.size() == 0){
1110                         barcodes[""] = 0;
1111                         barcodeNameVector.push_back("");                        
1112                 }
1113                 
1114                 if(primerNameVector.size() == 0){
1115                         primers[""] = 0;
1116                         primerNameVector.push_back("");                 
1117                 }
1118                 
1119                 fastaFileNames.resize(barcodeNameVector.size());
1120                 for(int i=0;i<fastaFileNames.size();i++){
1121                         fastaFileNames[i].assign(primerNameVector.size(), "");
1122                 }
1123                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1124                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1125                 
1126                 if(allFiles){
1127                         set<string> uniqueNames; //used to cleanup outputFileNames
1128                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1129                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1130                                         
1131                                         string primerName = primerNameVector[itPrimer->second];
1132                                         string barcodeName = barcodeNameVector[itBar->second];
1133                                         
1134                                         string comboGroupName = "";
1135                                         string fastaFileName = "";
1136                                         string qualFileName = "";
1137                                         string nameFileName = "";
1138                                         
1139                                         if(primerName == ""){
1140                                                 comboGroupName = barcodeNameVector[itBar->second];
1141                                         }
1142                                         else{
1143                                                 if(barcodeName == ""){
1144                                                         comboGroupName = primerNameVector[itPrimer->second];
1145                                                 }
1146                                                 else{
1147                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1148                                                 }
1149                                         }
1150                                         
1151                                         
1152                                         ofstream temp;
1153                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1154                                         if (uniqueNames.count(fastaFileName) == 0) {
1155                                                 outputNames.push_back(fastaFileName);
1156                                                 outputTypes["fasta"].push_back(fastaFileName);
1157                                                 uniqueNames.insert(fastaFileName);
1158                                         }
1159                                         
1160                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1161                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1162                                         
1163                                         if(qFileName != ""){
1164                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1165                                                 if (uniqueNames.count(qualFileName) == 0) {
1166                                                         outputNames.push_back(qualFileName);
1167                                                         outputTypes["qfile"].push_back(qualFileName);
1168                                                 }
1169                                                 
1170                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1171                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1172                                         }
1173                                         
1174                                         if(nameFile != ""){
1175                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1176                                                 if (uniqueNames.count(nameFileName) == 0) {
1177                                                         outputNames.push_back(nameFileName);
1178                                                         outputTypes["name"].push_back(nameFileName);
1179                                                 }
1180                                                 
1181                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1182                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1183                                         }
1184                                         
1185                                 }
1186                         }
1187                 }
1188                 numFPrimers = primers.size();
1189                 numRPrimers = revPrimer.size();
1190                 
1191                 bool allBlank = true;
1192                 for (int i = 0; i < barcodeNameVector.size(); i++) {
1193                         if (barcodeNameVector[i] != "") {
1194                                 allBlank = false;
1195                                 break;
1196                         }
1197                 }
1198                 for (int i = 0; i < primerNameVector.size(); i++) {
1199                         if (primerNameVector[i] != "") {
1200                                 allBlank = false;
1201                                 break;
1202                         }
1203                 }
1204                 
1205                 if (allBlank) {
1206                         m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
1207                         allFiles = false;
1208                         return false;
1209                 }
1210                 
1211                 return true;
1212                 
1213         }
1214         catch(exception& e) {
1215                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1216                 exit(1);
1217         }
1218 }
1219 //***************************************************************************************************************
1220
1221 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1222         try {
1223                 bool success = 1;
1224                 if(qscores.getName() != ""){
1225                         qscores.trimQScores(-1, keepFirst);
1226                 }
1227                 sequence.trim(keepFirst);
1228                 return success;
1229         }
1230         catch(exception& e) {
1231                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1232                 exit(1);
1233         }
1234         
1235 }       
1236
1237 //***************************************************************************************************************
1238
1239 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1240         try {
1241                 bool success = 0;
1242                 
1243                 int length = sequence.getNumBases() - removeLast;
1244                 
1245                 if(length > 0){
1246                         if(qscores.getName() != ""){
1247                                 qscores.trimQScores(-1, length);
1248                         }
1249                         sequence.trim(length);
1250                         success = 1;
1251                 }
1252                 else{
1253                         success = 0;
1254                 }
1255
1256                 return success;
1257         }
1258         catch(exception& e) {
1259                 m->errorOut(e, "removeLastTrim", "countDiffs");
1260                 exit(1);
1261         }
1262         
1263 }       
1264
1265 //***************************************************************************************************************
1266
1267 bool TrimSeqsCommand::cullLength(Sequence& seq){
1268         try {
1269         
1270                 int length = seq.getNumBases();
1271                 bool success = 0;       //guilty until proven innocent
1272                 
1273                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1274                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1275                 else                                                                                            {       success = 0;    }
1276                 
1277                 return success;
1278         
1279         }
1280         catch(exception& e) {
1281                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1282                 exit(1);
1283         }
1284         
1285 }
1286
1287 //***************************************************************************************************************
1288
1289 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1290         try {
1291                 int longHomoP = seq.getLongHomoPolymer();
1292                 bool success = 0;       //guilty until proven innocent
1293                 
1294                 if(longHomoP <= maxHomoP){      success = 1;    }
1295                 else                                    {       success = 0;    }
1296                 
1297                 return success;
1298         }
1299         catch(exception& e) {
1300                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1301                 exit(1);
1302         }
1303         
1304 }
1305
1306 //***************************************************************************************************************
1307
1308 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1309         try {
1310                 int numNs = seq.getAmbigBases();
1311                 bool success = 0;       //guilty until proven innocent
1312                 
1313                 if(numNs <= maxAmbig)   {       success = 1;    }
1314                 else                                    {       success = 0;    }
1315                 
1316                 return success;
1317         }
1318         catch(exception& e) {
1319                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1320                 exit(1);
1321         }
1322         
1323 }
1324 //***************************************************************************************************************