]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
fixed bug with trim.seqs allfiles=t
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //**********************************************************************************************************************
14 vector<string> TrimSeqsCommand::setParameters(){        
15         try {
16                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
17                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
18                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
19                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
20                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
21                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
22                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
23                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
24                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
25                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
26                 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
27                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
28                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
29                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
30                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
31                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
32                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
33                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
34                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
35                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
36                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
37                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
38                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
39                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
40                         
41                 vector<string> myArray;
42                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
43                 return myArray;
44         }
45         catch(exception& e) {
46                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
47                 exit(1);
48         }
49 }
50 //**********************************************************************************************************************
51 string TrimSeqsCommand::getHelpString(){        
52         try {
53                 string helpString = "";
54                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
55                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
56                 helpString += "The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
57                 helpString += "The fasta parameter is required.\n";
58                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
59                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
60                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
61                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
62                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
63                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
64                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n";
65                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
66                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
67                 helpString += "The qfile parameter allows you to provide a quality file.\n";
68                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
69                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
70                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
71                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
72                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
73                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
74                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
75                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
76                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
77                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
78                 helpString += "The trim.seqs command should be in the following format: \n";
79                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
80                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
81                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
82                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
83                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
84                 return helpString;
85         }
86         catch(exception& e) {
87                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
88                 exit(1);
89         }
90 }
91
92
93 //**********************************************************************************************************************
94
95 TrimSeqsCommand::TrimSeqsCommand(){     
96         try {
97                 abort = true; calledHelp = true; 
98                 setParameters();
99                 vector<string> tempOutNames;
100                 outputTypes["fasta"] = tempOutNames;
101                 outputTypes["qfile"] = tempOutNames;
102                 outputTypes["group"] = tempOutNames;
103         }
104         catch(exception& e) {
105                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
106                 exit(1);
107         }
108 }
109 //***************************************************************************************************************
110
111 TrimSeqsCommand::TrimSeqsCommand(string option)  {
112         try {
113                 
114                 abort = false; calledHelp = false;   
115                 comboStarts = 0;
116                 
117                 //allow user to run help
118                 if(option == "help") { help(); abort = true; calledHelp = true; }
119                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
120                 
121                 else {
122                         vector<string> myArray = setParameters();
123                         
124                         OptionParser parser(option);
125                         map<string,string> parameters = parser.getParameters();
126                         
127                         ValidParameters validParameter;
128                         map<string,string>::iterator it;
129                         
130                         //check to make sure all parameters are valid for command
131                         for (it = parameters.begin(); it != parameters.end(); it++) { 
132                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
133                         }
134                         
135                         //initialize outputTypes
136                         vector<string> tempOutNames;
137                         outputTypes["fasta"] = tempOutNames;
138                         outputTypes["qfile"] = tempOutNames;
139                         outputTypes["group"] = tempOutNames;
140                         
141                         //if the user changes the input directory command factory will send this info to us in the output parameter 
142                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
143                         if (inputDir == "not found"){   inputDir = "";          }
144                         else {
145                                 string path;
146                                 it = parameters.find("fasta");
147                                 //user has given a template file
148                                 if(it != parameters.end()){ 
149                                         path = m->hasPath(it->second);
150                                         //if the user has not given a path then, add inputdir. else leave path alone.
151                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
152                                 }
153                                 
154                                 it = parameters.find("oligos");
155                                 //user has given a template file
156                                 if(it != parameters.end()){ 
157                                         path = m->hasPath(it->second);
158                                         //if the user has not given a path then, add inputdir. else leave path alone.
159                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
160                                 }
161                                 
162                                 it = parameters.find("qfile");
163                                 //user has given a template file
164                                 if(it != parameters.end()){ 
165                                         path = m->hasPath(it->second);
166                                         //if the user has not given a path then, add inputdir. else leave path alone.
167                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
168                                 }
169                                 
170                         }
171
172                         
173                         //check for required parameters
174                         fastaFile = validParameter.validFile(parameters, "fasta", true);
175                         if (fastaFile == "not found") {                                 
176                                 fastaFile = m->getFastaFile(); 
177                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
178                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
179                         }else if (fastaFile == "not open") { abort = true; }    
180                         
181                         //if the user changes the output directory command factory will send this info to us in the output parameter 
182                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
183                                 outputDir = ""; 
184                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
185                         }
186                 
187                         
188                         //check for optional parameter and set defaults
189                         // ...at some point should added some additional type checking...
190                         string temp;
191                         temp = validParameter.validFile(parameters, "flip", false);
192                         if (temp == "not found"){       flip = 0;       }
193                         else if(m->isTrue(temp))        {       flip = 1;       }
194                 
195                         temp = validParameter.validFile(parameters, "oligos", true);
196                         if (temp == "not found"){       oligoFile = "";         }
197                         else if(temp == "not open"){    abort = true;   } 
198                         else                                    {       oligoFile = temp;               }
199                         
200                         
201                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
202                         convert(temp, maxAmbig);  
203
204                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
205                         convert(temp, maxHomoP);  
206
207                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
208                         convert(temp, minLength); 
209                         
210                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
211                         convert(temp, maxLength);
212                         
213                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
214                         convert(temp, bdiffs);
215                         
216                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
217                         convert(temp, pdiffs);
218                         
219                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
220                         convert(temp, tdiffs);
221                         
222                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
223                         
224                         temp = validParameter.validFile(parameters, "qfile", true);     
225                         if (temp == "not found")        {       qFileName = "";         }
226                         else if(temp == "not open")     {       abort = true;           }
227                         else                                            {       qFileName = temp;       }
228                         
229                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
230                         convert(temp, qThreshold);
231                         
232                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
233                         qtrim = m->isTrue(temp);
234
235                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
236                         convert(temp, qRollAverage);
237
238                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
239                         convert(temp, qWindowAverage);
240
241                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
242                         convert(temp, qWindowSize);
243
244                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
245                         convert(temp, qWindowStep);
246
247                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
248                         convert(temp, qAverage);
249
250                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
251                         convert(temp, keepFirst);
252
253                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
254                         convert(temp, removeLast);
255                         
256                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
257                         allFiles = m->isTrue(temp);
258                         
259                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
260                         m->setProcessors(temp);
261                         convert(temp, processors); 
262                         
263                         
264                         if(allFiles && (oligoFile == "")){
265                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
266                         }
267                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
268                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
269                                 qAverage=0;
270                                 qThreshold=0;
271                         }
272                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
273                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
274                                 abort = true;
275                         }
276                 }
277
278         }
279         catch(exception& e) {
280                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
281                 exit(1);
282         }
283 }
284 //***************************************************************************************************************
285
286 int TrimSeqsCommand::execute(){
287         try{
288         
289                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
290                 
291                 numFPrimers = 0;  //this needs to be initialized
292                 numRPrimers = 0;
293                 vector<vector<string> > fastaFileNames;
294                 vector<vector<string> > qualFileNames;
295                 
296                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
297                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
298                 
299                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
300                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
301                 
302                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
303                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
304                 if (qFileName != "") {
305                         outputNames.push_back(trimQualFile);
306                         outputNames.push_back(scrapQualFile);
307                         outputTypes["qfile"].push_back(trimQualFile);
308                         outputTypes["qfile"].push_back(scrapQualFile); 
309                 }
310                 
311                 string outputGroupFileName;
312                 if(oligoFile != ""){
313                         outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
314                         outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
315                         getOligos(fastaFileNames, qualFileNames);
316                 }
317                 
318                 vector<unsigned long int> fastaFilePos;
319                 vector<unsigned long int> qFilePos;
320                 
321                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
322                 
323                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
324                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
325                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
326                 }       
327                 if(qFileName == "")     {       qLines = lines; } //files with duds
328                 
329                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
330                                 if(processors == 1){
331                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]);
332                                 }else{
333                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames); 
334                                 }       
335                 #else
336                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, outputGroupFileName, fastaFileNames, qualFileNames, lines[0], qLines[0]);
337                 #endif
338                 
339                 if (m->control_pressed) {  return 0; }                  
340         
341                 if(allFiles){
342                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
343                         map<string, string>::iterator it;
344                         set<string> namesToRemove;
345                         for(int i=0;i<fastaFileNames.size();i++){
346                                 for(int j=0;j<fastaFileNames[0].size();j++){
347                                         if (fastaFileNames[i][j] != "") {
348                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
349                                                         if(m->isBlank(fastaFileNames[i][j])){
350                                                                 remove(fastaFileNames[i][j].c_str());
351                                                                 namesToRemove.insert(fastaFileNames[i][j]);
352                                                         
353                                                                 if(qFileName != ""){
354                                                                         remove(qualFileNames[i][j].c_str());
355                                                                         namesToRemove.insert(qualFileNames[i][j]);
356                                                                 }
357                                                         }else{  
358                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
359                                                                 if (it == uniqueFastaNames.end()) {     
360                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
361                                                                 }       
362                                                         }
363                                                 }
364                                         }
365                                 }
366                         }
367                         
368                         //remove names for outputFileNames, just cleans up the output
369                         vector<string> outputNames2;
370                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
371                         outputNames = outputNames2;
372                         
373                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
374                                 ifstream in;
375                                 m->openInputFile(it->first, in);
376                                 
377                                 ofstream out;
378                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
379                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
380                                 m->openOutputFile(thisGroupName, out);
381                                 
382                                 while (!in.eof()){
383                                         if (m->control_pressed) { break; }
384                                         
385                                         Sequence currSeq(in); m->gobble(in);
386                                         out << currSeq.getName() << '\t' << it->second << endl;
387                                 }
388                                 in.close();
389                                 out.close();
390                         }
391                 }
392                 
393                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0;     }
394
395                 //output group counts
396                 m->mothurOutEndLine();
397                 int total = 0;
398                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
399                          total += it->second; m->mothurOut("Group " + it->first + " contains " + toString(it->second) + " sequences."); m->mothurOutEndLine(); 
400                 }
401                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
402                 
403                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0;     }
404
405                 //set fasta file as new current fastafile
406                 string current = "";
407                 itTypes = outputTypes.find("fasta");
408                 if (itTypes != outputTypes.end()) {
409                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
410                 }
411                 
412                 itTypes = outputTypes.find("qfile");
413                 if (itTypes != outputTypes.end()) {
414                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
415                 }
416                 
417                 itTypes = outputTypes.find("group");
418                 if (itTypes != outputTypes.end()) {
419                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
420                 }
421
422                 m->mothurOutEndLine();
423                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
424                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
425                 m->mothurOutEndLine();
426                 
427                 return 0;       
428                         
429         }
430         catch(exception& e) {
431                 m->errorOut(e, "TrimSeqsCommand", "execute");
432                 exit(1);
433         }
434 }
435                 
436 /**************************************************************************************/
437
438 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, linePair* line, linePair* qline) {        
439                 
440         try {
441                 
442                 ofstream trimFASTAFile;
443                 m->openOutputFile(trimFileName, trimFASTAFile);
444                 
445                 ofstream scrapFASTAFile;
446                 m->openOutputFile(scrapFileName, scrapFASTAFile);
447                 
448                 ofstream trimQualFile;
449                 ofstream scrapQualFile;
450                 if(qFileName != ""){
451                         m->openOutputFile(trimQFileName, trimQualFile);
452                         m->openOutputFile(scrapQFileName, scrapQualFile);
453                 }
454                 
455                 ofstream outGroupsFile;
456                 if (oligoFile != ""){   m->openOutputFile(groupFileName, outGroupsFile);   }
457                 if(allFiles){
458                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
459                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
460                                         if (fastaFileNames[i][j] != "") {
461                                                 ofstream temp;
462                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
463                                                 if(qFileName != ""){
464                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
465                                                 }
466                                         }
467                                 }
468                         }
469                 }
470                 
471                 ifstream inFASTA;
472                 m->openInputFile(filename, inFASTA);
473                 inFASTA.seekg(line->start);
474                 
475                 ifstream qFile;
476                 if(qFileName != "")     {
477                         m->openInputFile(qFileName, qFile);
478                         qFile.seekg(qline->start);  
479                 }
480                 
481                 int count = 0;
482                 bool moreSeqs = 1;
483         
484                 while (moreSeqs) {
485                                 
486                         if (m->control_pressed) { 
487                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
488                                 if (oligoFile != "") {   outGroupsFile.close();   }
489
490                                 if(qFileName != ""){
491                                         qFile.close();
492                                 }
493                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
494
495                                 return 0;
496                         }
497                         
498                         int success = 1;
499                         string trashCode = "";
500                         int currentSeqsDiffs = 0;
501
502                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
503                         
504                         QualityScores currQual;
505                         if(qFileName != ""){
506                                 currQual = QualityScores(qFile);  m->gobble(qFile);
507                         }
508
509                         string origSeq = currSeq.getUnaligned();
510                         if (origSeq != "") {
511                                 
512                                 int barcodeIndex = 0;
513                                 int primerIndex = 0;
514                                 
515                                 if(barcodes.size() != 0){
516                                         success = stripBarcode(currSeq, currQual, barcodeIndex);
517                                         if(success > bdiffs)            {       trashCode += 'b';       }
518                                         else{ currentSeqsDiffs += success;  }
519                                 }
520                                 
521                                 if(numFPrimers != 0){
522                                         success = stripForward(currSeq, currQual, primerIndex);
523                                         if(success > pdiffs)            {       trashCode += 'f';       }
524                                         else{ currentSeqsDiffs += success;  }
525                                 }
526                                 
527                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
528                                 
529                                 if(numRPrimers != 0){
530                                         success = stripReverse(currSeq, currQual);
531                                         if(!success)                            {       trashCode += 'r';       }
532                                 }
533
534                                 if(keepFirst != 0){
535                                         success = keepFirstTrim(currSeq, currQual);
536                                 }
537                                 
538                                 if(removeLast != 0){
539                                         success = removeLastTrim(currSeq, currQual);
540                                         if(!success)                            {       trashCode += 'l';       }
541                                 }
542
543                                 
544                                 if(qFileName != ""){
545                                         int origLength = currSeq.getNumBases();
546                                         
547                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
548                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
549                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
550                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
551                                         else                                            {       success = 1;                            }
552                                 
553                                         //you don't want to trim, if it fails above then scrap it
554                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
555                                         
556                                         if(!success)                            {       trashCode += 'q';       }
557                                 }                               
558                 
559                                 if(minLength > 0 || maxLength > 0){
560                                         success = cullLength(currSeq);
561                                         if(!success)                            {       trashCode += 'l';       }
562                                 }
563                                 if(maxHomoP > 0){
564                                         success = cullHomoP(currSeq);
565                                         if(!success)                            {       trashCode += 'h';       }
566                                 }
567                                 if(maxAmbig != -1){
568                                         success = cullAmbigs(currSeq);
569                                         if(!success)                            {       trashCode += 'n';       }
570                                 }
571                                 
572                                 if(flip){               // should go last                       
573                                         currSeq.reverseComplement();
574                                         if(qFileName != ""){
575                                                 currQual.flipQScores(); 
576                                         }
577                                 }
578                                 
579                                 if(trashCode.length() == 0){
580                                         currSeq.setAligned(currSeq.getUnaligned());
581                                         currSeq.printSequence(trimFASTAFile);
582                                         
583                                         if(qFileName != ""){
584                                                 currQual.printQScores(trimQualFile);
585                                         }
586                                         
587                                         if(barcodes.size() != 0){
588                                                 string thisGroup = barcodeNameVector[barcodeIndex];
589                                                 if (primers.size() != 0) { if (primerNameVector[primerIndex] != "") { thisGroup += "." + primerNameVector[primerIndex]; } }
590                                                 
591                                                 outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
592                                                 
593                                                 map<string, int>::iterator it = groupCounts.find(thisGroup);
594                                                 if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
595                                                 else { groupCounts[it->first]++; }
596                                                         
597                                         }
598                                         
599                                         
600                                         if(allFiles){
601                                                 ofstream output;
602                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
603                                                 currSeq.printSequence(output);
604                                                 output.close();
605                                                 
606                                                 if(qFileName != ""){
607                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
608                                                         currQual.printQScores(output);
609                                                         output.close();                                                 
610                                                 }
611                                         }
612                                 }
613                                 else{
614                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
615                                         currSeq.setUnaligned(origSeq);
616                                         currSeq.setAligned(origSeq);
617                                         currSeq.printSequence(scrapFASTAFile);
618                                         if(qFileName != ""){
619                                                 currQual.printQScores(scrapQualFile);
620                                         }
621                                 }
622                                 count++;
623                         }
624                         
625                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
626                                 unsigned long int pos = inFASTA.tellg();
627                                 if ((pos == -1) || (pos >= line->end)) { break; }
628                         #else
629                                 if (inFASTA.eof()) { break; }
630                         #endif
631                                 
632                         //report progress
633                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
634                         
635                 }
636                 //report progress
637                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
638
639                 
640                 inFASTA.close();
641                 trimFASTAFile.close();
642                 scrapFASTAFile.close();
643                 if (oligoFile != "") {   outGroupsFile.close();   }
644                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
645                 
646                 return count;
647         }
648         catch(exception& e) {
649                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
650                 exit(1);
651         }
652 }
653
654 /**************************************************************************************************/
655
656 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames) {
657         try {
658 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
659                 int process = 1;
660                 int exitCommand = 1;
661                 processIDS.clear();
662                 
663                 //loop through and create all the processes you want
664                 while (process != processors) {
665                         int pid = fork();
666                         
667                         if (pid > 0) {
668                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
669                                 process++;
670                         }else if (pid == 0){
671                                 
672                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
673                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
674
675                                 if(allFiles){
676                                         ofstream temp;
677
678                                         for(int i=0;i<tempFASTAFileNames.size();i++){
679                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
680                                                         if (tempFASTAFileNames[i][j] != "") {
681                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
682                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
683
684                                                                 if(qFileName != ""){
685                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
686                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
687                                                                 }
688                                                         }
689                                                 }
690                                         }
691                                 }
692                                                         
693                                 driverCreateTrim(filename,
694                                                                  qFileName,
695                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
696                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
697                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
698                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
699                                                                  (groupFile + toString(getpid()) + ".temp"),
700                                                                  tempFASTAFileNames,
701                                                                  tempPrimerQualFileNames,
702                                                                  lines[process],
703                                                                  qLines[process]);
704                                 
705                                 //pass groupCounts to parent
706                                 ofstream out;
707                                 string tempFile = filename + toString(getpid()) + ".num.temp";
708                                 m->openOutputFile(tempFile, out);
709                                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
710                                         out << it->first << '\t' << it->second << endl;
711                                 }
712                                 out.close();
713                                 
714                                 exit(0);
715                         }else { 
716                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
717                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
718                                 exit(0);
719                         }
720                 }
721                 
722                 //parent do my part
723                 ofstream temp;
724                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
725                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
726                 m->openOutputFile(trimQualFileName, temp);              temp.close();
727                 m->openOutputFile(scrapQualFileName, temp);             temp.close();
728
729                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, groupFile, fastaFileNames, qualFileNames, lines[0], qLines[0]);
730                 
731                 //force parent to wait until all the processes are done
732                 for (int i=0;i<processIDS.size();i++) { 
733                         int temp = processIDS[i];
734                         wait(&temp);
735                 }
736                 
737                 //append files
738                 for(int i=0;i<processIDS.size();i++){
739                         
740                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
741                         
742                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
743                         remove((trimFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
744                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
745                         remove((scrapFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
746                         
747                         if(qFileName != ""){
748                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
749                                 remove((trimQualFileName + toString(processIDS[i]) + ".temp").c_str());
750                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
751                                 remove((scrapQualFileName + toString(processIDS[i]) + ".temp").c_str());
752                         }
753                         
754                         m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
755                         remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
756                         
757                         
758                         if(allFiles){
759                                 for(int j=0;j<fastaFileNames.size();j++){
760                                         for(int k=0;k<fastaFileNames[j].size();k++){
761                                                 if (fastaFileNames[j][k] != "") {
762                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
763                                                         remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
764                                                         
765                                                         if(qFileName != ""){
766                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
767                                                                 remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
768                                                         }
769                                                 }
770                                         }
771                                 }
772                         }
773                         
774                         ifstream in;
775                         string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
776                         m->openInputFile(tempFile, in);
777                         int tempNum;
778                         string group;
779                         while (!in.eof()) { 
780                                 in >> group >> tempNum; m->gobble(in);
781                                 
782                                 map<string, int>::iterator it = groupCounts.find(group);
783                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
784                                 else { groupCounts[it->first] += tempNum; }
785                         }
786                         in.close(); remove(tempFile.c_str());
787                         
788                 }
789         
790                 return exitCommand;
791 #endif          
792         }
793         catch(exception& e) {
794                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
795                 exit(1);
796         }
797 }
798
799 /**************************************************************************************************/
800
801 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
802         try {
803                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
804                 //set file positions for fasta file
805                 fastaFilePos = m->divideFile(filename, processors);
806                 
807                 if (qfilename == "") { return processors; }
808                 
809                 //get name of first sequence in each chunk
810                 map<string, int> firstSeqNames;
811                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
812                         ifstream in;
813                         m->openInputFile(filename, in);
814                         in.seekg(fastaFilePos[i]);
815                 
816                         Sequence temp(in); 
817                         firstSeqNames[temp.getName()] = i;
818                 
819                         in.close();
820                 }
821                                 
822                 //seach for filePos of each first name in the qfile and save in qfileFilePos
823                 ifstream inQual;
824                 m->openInputFile(qfilename, inQual);
825                 
826                 string input;
827                 while(!inQual.eof()){   
828                         input = m->getline(inQual);
829
830                         if (input.length() != 0) {
831                                 if(input[0] == '>'){ //this is a sequence name line
832                                         istringstream nameStream(input);
833                                         
834                                         string sname = "";  nameStream >> sname;
835                                         sname = sname.substr(1);
836                                         
837                                         map<string, int>::iterator it = firstSeqNames.find(sname);
838                                         
839                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
840                                                 unsigned long int pos = inQual.tellg(); 
841                                                 qfileFilePos.push_back(pos - input.length() - 1);       
842                                                 firstSeqNames.erase(it);
843                                         }
844                                 }
845                         }
846                         
847                         if (firstSeqNames.size() == 0) { break; }
848                 }
849                 inQual.close();
850                 
851                 
852                 if (firstSeqNames.size() != 0) { 
853                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
854                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
855                         }
856                         qFileName = "";
857                         return processors;
858                 }
859
860                 //get last file position of qfile
861                 FILE * pFile;
862                 unsigned long int size;
863                 
864                 //get num bytes in file
865                 pFile = fopen (qfilename.c_str(),"rb");
866                 if (pFile==NULL) perror ("Error opening file");
867                 else{
868                         fseek (pFile, 0, SEEK_END);
869                         size=ftell (pFile);
870                         fclose (pFile);
871                 }
872                 
873                 qfileFilePos.push_back(size);
874                 
875                 return processors;
876                 
877                 #else
878                 
879                         fastaFilePos.push_back(0); qfileFilePos.push_back(0);
880                         //get last file position of fastafile
881                         FILE * pFile;
882                         unsigned long int size;
883                         
884                         //get num bytes in file
885                         pFile = fopen (filename.c_str(),"rb");
886                         if (pFile==NULL) perror ("Error opening file");
887                         else{
888                                 fseek (pFile, 0, SEEK_END);
889                                 size=ftell (pFile);
890                                 fclose (pFile);
891                         }
892                         fastaFilePos.push_back(size);
893                         
894                         //get last file position of fastafile
895                         FILE * qFile;
896                         
897                         //get num bytes in file
898                         qFile = fopen (qfilename.c_str(),"rb");
899                         if (qFile==NULL) perror ("Error opening file");
900                         else{
901                                 fseek (qFile, 0, SEEK_END);
902                                 size=ftell (qFile);
903                                 fclose (qFile);
904                         }
905                         qfileFilePos.push_back(size);
906                 
907                         return 1;
908                 
909                 #endif
910         }
911         catch(exception& e) {
912                 m->errorOut(e, "TrimSeqsCommand", "setLines");
913                 exit(1);
914         }
915 }
916
917 //***************************************************************************************************************
918
919 void TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames){
920         try {
921                 ifstream inOligos;
922                 m->openInputFile(oligoFile, inOligos);
923                 
924                 ofstream test;
925                 
926                 string type, oligo, group;
927
928                 int indexPrimer = 0;
929                 int indexBarcode = 0;
930                 
931                 while(!inOligos.eof()){
932
933                         inOligos >> type; 
934                                         
935                         if(type[0] == '#'){
936                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
937                                 m->gobble(inOligos);
938                         }
939                         else{
940                                 m->gobble(inOligos);
941                                 //make type case insensitive
942                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
943                                 
944                                 inOligos >> oligo;
945                                 
946                                 for(int i=0;i<oligo.length();i++){
947                                         oligo[i] = toupper(oligo[i]);
948                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
949                                 }
950                                 
951                                 if(type == "FORWARD"){
952                                         group = "";
953                                         
954                                         // get rest of line in case there is a primer name
955                                         while (!inOligos.eof()) {       
956                                                 char c = inOligos.get(); 
957                                                 if (c == 10 || c == 13){        break;  }
958                                                 else if (c == 32 || c == 9){;} //space or tab
959                                                 else {  group += c;  }
960                                         } 
961                                         
962                                         //check for repeat barcodes
963                                         map<string, int>::iterator itPrime = primers.find(oligo);
964                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
965                                         
966                                         primers[oligo]=indexPrimer; indexPrimer++;              
967                                         primerNameVector.push_back(group);
968                                 }
969                                 else if(type == "REVERSE"){
970                                         Sequence oligoRC("reverse", oligo);
971                                         oligoRC.reverseComplement();
972                                         revPrimer.push_back(oligoRC.getUnaligned());
973                                 }
974                                 else if(type == "BARCODE"){
975                                         inOligos >> group;
976                                         
977                                         //check for repeat barcodes
978                                         map<string, int>::iterator itBar = barcodes.find(oligo);
979                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
980                                                 
981                                         barcodes[oligo]=indexBarcode; indexBarcode++;
982                                         barcodeNameVector.push_back(group);
983                                 }
984                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
985                         }
986                         m->gobble(inOligos);
987                 }       
988                 inOligos.close();
989                 
990                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
991                 
992                 //add in potential combos
993                 if(barcodeNameVector.size() == 0){
994                         barcodes[""] = 0;
995                         barcodeNameVector.push_back("");                        
996                 }
997                 
998                 if(primerNameVector.size() == 0){
999                         primers[""] = 0;
1000                         primerNameVector.push_back("");                 
1001                 }
1002                 
1003                 fastaFileNames.resize(barcodeNameVector.size());
1004                 for(int i=0;i<fastaFileNames.size();i++){
1005                         fastaFileNames[i].assign(primerNameVector.size(), "");
1006                 }
1007                 if(qFileName != ""){    qualFileNames = fastaFileNames; }
1008                 
1009                 if(allFiles){
1010                         set<string> uniqueNames; //used to cleanup outputFileNames
1011                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1012                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1013                                         
1014                                         string primerName = primerNameVector[itPrimer->second];
1015                                         string barcodeName = barcodeNameVector[itBar->second];
1016                                         
1017                                         string comboGroupName = "";
1018                                         string fastaFileName = "";
1019                                         string qualFileName = "";
1020                                         
1021                                         if(primerName == ""){
1022                                                 comboGroupName = barcodeNameVector[itBar->second];
1023                                         }
1024                                         else{
1025                                                 if(barcodeName == ""){
1026                                                         comboGroupName = primerNameVector[itPrimer->second];
1027                                                 }
1028                                                 else{
1029                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1030                                                 }
1031                                         }
1032                                         
1033                                         if ((comboGroupName == "") || (comboGroupName == ".")) {}
1034                                         else {
1035                                                 ofstream temp;
1036                                                 fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1037                                                 if (uniqueNames.count(fastaFileName) == 0) {
1038                                                         outputNames.push_back(fastaFileName);
1039                                                         outputTypes["fasta"].push_back(fastaFileName);
1040                                                         uniqueNames.insert(fastaFileName);
1041                                                 }
1042                                                 
1043                                                 fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1044                                                 m->openOutputFile(fastaFileName, temp);         temp.close();
1045                                                 
1046                                                 if(qFileName != ""){
1047                                                         qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1048                                                         if (uniqueNames.count(fastaFileName) == 0) {
1049                                                                 outputNames.push_back(qualFileName);
1050                                                                 outputTypes["qfile"].push_back(qualFileName);
1051                                                         }
1052                                                         
1053                                                         qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1054                                                         m->openOutputFile(qualFileName, temp);          temp.close();
1055                                                 }
1056                                         }
1057                                 }
1058                         }
1059                 }
1060                 numFPrimers = primers.size();
1061                 numRPrimers = revPrimer.size();
1062
1063         }
1064         catch(exception& e) {
1065                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1066                 exit(1);
1067         }
1068 }
1069
1070 //***************************************************************************************************************
1071
1072 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
1073         try {
1074                 
1075                 string rawSequence = seq.getUnaligned();
1076                 int success = bdiffs + 1;       //guilty until proven innocent
1077                 
1078                 //can you find the barcode
1079                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1080                         string oligo = it->first;
1081                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
1082                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1083                                 break;  
1084                         }
1085                         
1086                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1087                                 group = it->second;
1088                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1089                                 
1090                                 if(qual.getName() != ""){
1091                                         qual.trimQScores(oligo.length(), -1);
1092                                 }
1093                                 
1094                                 success = 0;
1095                                 break;
1096                         }
1097                 }
1098                 
1099                 //if you found the barcode or if you don't want to allow for diffs
1100                 if ((bdiffs == 0) || (success == 0)) { return success;  }
1101                 
1102                 else { //try aligning and see if you can find it
1103
1104                         int maxLength = 0;
1105
1106                         Alignment* alignment;
1107                         if (barcodes.size() > 0) {
1108                                 map<string,int>::iterator it=barcodes.begin();
1109
1110                                 for(it;it!=barcodes.end();it++){
1111                                         if(it->first.length() > maxLength){
1112                                                 maxLength = it->first.length();
1113                                         }
1114                                 }
1115                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
1116
1117                         }else{ alignment = NULL; } 
1118                         
1119                         //can you find the barcode
1120                         int minDiff = 1e6;
1121                         int minCount = 1;
1122                         int minGroup = -1;
1123                         int minPos = 0;
1124                         
1125                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1126                                 string oligo = it->first;
1127 //                              int length = oligo.length();
1128                                 
1129                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
1130                                         success = bdiffs + 10;
1131                                         break;
1132                                 }
1133                                 
1134                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1135                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1136                                 oligo = alignment->getSeqAAln();
1137                                 string temp = alignment->getSeqBAln();
1138                 
1139                                 int alnLength = oligo.length();
1140                                 
1141                                 for(int i=oligo.length()-1;i>=0;i--){
1142                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1143                                 }
1144                                 oligo = oligo.substr(0,alnLength);
1145                                 temp = temp.substr(0,alnLength);
1146                                 
1147                                 int numDiff = countDiffs(oligo, temp);
1148                                 
1149                                 if(numDiff < minDiff){
1150                                         minDiff = numDiff;
1151                                         minCount = 1;
1152                                         minGroup = it->second;
1153                                         minPos = 0;
1154                                         for(int i=0;i<alnLength;i++){
1155                                                 if(temp[i] != '-'){
1156                                                         minPos++;
1157                                                 }
1158                                         }
1159                                 }
1160                                 else if(numDiff == minDiff){
1161                                         minCount++;
1162                                 }
1163
1164                         }
1165
1166                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1167                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1168                         else{                                                                                                   //use the best match
1169                                 group = minGroup;
1170                                 seq.setUnaligned(rawSequence.substr(minPos));
1171                                 
1172                                 if(qual.getName() != ""){
1173                                         qual.trimQScores(minPos, -1);
1174                                 }
1175                                 success = minDiff;
1176                         }
1177                         
1178                         if (alignment != NULL) {  delete alignment;  }
1179                         
1180                 }
1181                 
1182                 return success;
1183                 
1184         }
1185         catch(exception& e) {
1186                 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1187                 exit(1);
1188         }
1189
1190 }
1191
1192 //***************************************************************************************************************
1193
1194 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1195         try {
1196                 string rawSequence = seq.getUnaligned();
1197                 int success = pdiffs + 1;       //guilty until proven innocent
1198                 
1199                 //can you find the primer
1200                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1201                         string oligo = it->first;
1202                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1203                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1204                                 break;  
1205                         }
1206                         
1207                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1208                                 group = it->second;
1209                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1210                                 if(qual.getName() != ""){
1211                                         qual.trimQScores(oligo.length(), -1);
1212                                 }
1213                                 success = 0;
1214                                 break;
1215                         }
1216                 }
1217
1218                 //if you found the barcode or if you don't want to allow for diffs
1219                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1220                 
1221                 else { //try aligning and see if you can find it
1222
1223                         int maxLength = 0;
1224
1225                         Alignment* alignment;
1226                         if (primers.size() > 0) {
1227                                 map<string,int>::iterator it=primers.begin();
1228
1229                                 for(it;it!=primers.end();it++){
1230                                         if(it->first.length() > maxLength){
1231                                                 maxLength = it->first.length();
1232                                         }
1233                                 }
1234                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
1235
1236                         }else{ alignment = NULL; } 
1237                         
1238                         //can you find the barcode
1239                         int minDiff = 1e6;
1240                         int minCount = 1;
1241                         int minGroup = -1;
1242                         int minPos = 0;
1243                         
1244                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1245                                 string oligo = it->first;
1246 //                              int length = oligo.length();
1247                                 
1248                                 if(rawSequence.length() < maxLength){   
1249                                         success = pdiffs + 100;
1250                                         break;
1251                                 }
1252                                 
1253                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1254                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1255                                 oligo = alignment->getSeqAAln();
1256                                 string temp = alignment->getSeqBAln();
1257                 
1258                                 int alnLength = oligo.length();
1259                                 
1260                                 for(int i=oligo.length()-1;i>=0;i--){
1261                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1262                                 }
1263                                 oligo = oligo.substr(0,alnLength);
1264                                 temp = temp.substr(0,alnLength);
1265                                 
1266                                 int numDiff = countDiffs(oligo, temp);
1267                                 
1268                                 if(numDiff < minDiff){
1269                                         minDiff = numDiff;
1270                                         minCount = 1;
1271                                         minGroup = it->second;
1272                                         minPos = 0;
1273                                         for(int i=0;i<alnLength;i++){
1274                                                 if(temp[i] != '-'){
1275                                                         minPos++;
1276                                                 }
1277                                         }
1278                                 }
1279                                 else if(numDiff == minDiff){
1280                                         minCount++;
1281                                 }
1282
1283                         }
1284
1285                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1286                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1287                         else{                                                                                                   //use the best match
1288                                 group = minGroup;
1289                                 seq.setUnaligned(rawSequence.substr(minPos));
1290                                 if(qual.getName() != ""){
1291                                         qual.trimQScores(minPos, -1);
1292                                 }
1293                                 success = minDiff;
1294                         }
1295                         
1296                         if (alignment != NULL) {  delete alignment;  }
1297                         
1298                 }
1299                 
1300                 return success;
1301
1302         }
1303         catch(exception& e) {
1304                 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1305                 exit(1);
1306         }
1307 }
1308
1309 //***************************************************************************************************************
1310
1311 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1312         try {
1313                 string rawSequence = seq.getUnaligned();
1314                 bool success = 0;       //guilty until proven innocent
1315                 
1316                 for(int i=0;i<numRPrimers;i++){
1317                         string oligo = revPrimer[i];
1318                         
1319                         if(rawSequence.length() < oligo.length()){
1320                                 success = 0;
1321                                 break;
1322                         }
1323                         
1324                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1325                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1326                                 if(qual.getName() != ""){
1327                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1328                                 }
1329                                 success = 1;
1330                                 break;
1331                         }
1332                 }       
1333                 return success;
1334                 
1335         }
1336         catch(exception& e) {
1337                 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1338                 exit(1);
1339         }
1340 }
1341
1342 //***************************************************************************************************************
1343
1344 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1345         try {
1346                 bool success = 1;
1347                 if(qscores.getName() != ""){
1348                         qscores.trimQScores(-1, keepFirst);
1349                 }
1350                 sequence.trim(keepFirst);
1351                 return success;
1352         }
1353         catch(exception& e) {
1354                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1355                 exit(1);
1356         }
1357         
1358 }       
1359
1360 //***************************************************************************************************************
1361
1362 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1363         try {
1364                 bool success = 0;
1365                 
1366                 int length = sequence.getNumBases() - removeLast;
1367                 
1368                 if(length > 0){
1369                         if(qscores.getName() != ""){
1370                                 qscores.trimQScores(-1, length);
1371                         }
1372                         sequence.trim(length);
1373                         success = 1;
1374                 }
1375                 else{
1376                         success = 0;
1377                 }
1378
1379                 return success;
1380         }
1381         catch(exception& e) {
1382                 m->errorOut(e, "removeLastTrim", "countDiffs");
1383                 exit(1);
1384         }
1385         
1386 }       
1387
1388 //***************************************************************************************************************
1389
1390 bool TrimSeqsCommand::cullLength(Sequence& seq){
1391         try {
1392         
1393                 int length = seq.getNumBases();
1394                 bool success = 0;       //guilty until proven innocent
1395                 
1396                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1397                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1398                 else                                                                                            {       success = 0;    }
1399                 
1400                 return success;
1401         
1402         }
1403         catch(exception& e) {
1404                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1405                 exit(1);
1406         }
1407         
1408 }
1409
1410 //***************************************************************************************************************
1411
1412 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1413         try {
1414                 int longHomoP = seq.getLongHomoPolymer();
1415                 bool success = 0;       //guilty until proven innocent
1416                 
1417                 if(longHomoP <= maxHomoP){      success = 1;    }
1418                 else                                    {       success = 0;    }
1419                 
1420                 return success;
1421         }
1422         catch(exception& e) {
1423                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1424                 exit(1);
1425         }
1426         
1427 }
1428
1429 //***************************************************************************************************************
1430
1431 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1432         try {
1433                 int numNs = seq.getAmbigBases();
1434                 bool success = 0;       //guilty until proven innocent
1435                 
1436                 if(numNs <= maxAmbig)   {       success = 1;    }
1437                 else                                    {       success = 0;    }
1438                 
1439                 return success;
1440         }
1441         catch(exception& e) {
1442                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1443                 exit(1);
1444         }
1445         
1446 }
1447
1448 //***************************************************************************************************************
1449
1450 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1451         try {
1452                 bool success = 1;
1453                 int length = oligo.length();
1454                 
1455                 for(int i=0;i<length;i++){
1456                         
1457                         if(oligo[i] != seq[i]){
1458                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1459                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1460                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1461                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1462                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1463                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1464                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1465                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1466                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1467                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1468                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1469                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1470                                 
1471                                 if(success == 0)        {       break;   }
1472                         }
1473                         else{
1474                                 success = 1;
1475                         }
1476                 }
1477                 
1478                 return success;
1479         }
1480         catch(exception& e) {
1481                 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1482                 exit(1);
1483         }
1484
1485 }
1486
1487 //***************************************************************************************************************
1488
1489 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1490         try {
1491
1492                 int length = oligo.length();
1493                 int countDiffs = 0;
1494                 
1495                 for(int i=0;i<length;i++){
1496                                                                 
1497                         if(oligo[i] != seq[i]){
1498                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1499                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1500                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1501                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1502                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1503                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1504                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1505                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1506                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1507                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1508                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1509                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1510                         }
1511                         
1512                 }
1513                 
1514                 return countDiffs;
1515         }
1516         catch(exception& e) {
1517                 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1518                 exit(1);
1519         }
1520
1521 }
1522
1523 //***************************************************************************************************************