]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
paralellized seq.error and dist.shared added some error checks to libshuff and dist...
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //**********************************************************************************************************************
14 vector<string> TrimSeqsCommand::setParameters(){        
15         try {
16                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
17                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
18                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
19                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
20                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
21                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
22                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
23                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
24                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
25                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
26                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
27                 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
28                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
29                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
30                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
31                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
32                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
33                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
34                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
35                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
36                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
37                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
38                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
39                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
40                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
41                         
42                 vector<string> myArray;
43                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 string TrimSeqsCommand::getHelpString(){        
53         try {
54                 string helpString = "";
55                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
56                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
57                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
58                 helpString += "The fasta parameter is required.\n";
59                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
60                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
61                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
62                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
63                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
64                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
65                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
66                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n";
67                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
68                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
69                 helpString += "The qfile parameter allows you to provide a quality file.\n";
70                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
71                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
72                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
73                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
74                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
75                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
76                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
77                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
78                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
79                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
80                 helpString += "The trim.seqs command should be in the following format: \n";
81                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
82                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
83                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
84                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
85                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
86                 return helpString;
87         }
88         catch(exception& e) {
89                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
90                 exit(1);
91         }
92 }
93
94
95 //**********************************************************************************************************************
96
97 TrimSeqsCommand::TrimSeqsCommand(){     
98         try {
99                 abort = true; calledHelp = true; 
100                 setParameters();
101                 vector<string> tempOutNames;
102                 outputTypes["fasta"] = tempOutNames;
103                 outputTypes["qfile"] = tempOutNames;
104                 outputTypes["group"] = tempOutNames;
105                 outputTypes["name"] = tempOutNames;
106         }
107         catch(exception& e) {
108                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
109                 exit(1);
110         }
111 }
112 //***************************************************************************************************************
113
114 TrimSeqsCommand::TrimSeqsCommand(string option)  {
115         try {
116                 
117                 abort = false; calledHelp = false;   
118                 comboStarts = 0;
119                 
120                 //allow user to run help
121                 if(option == "help") { help(); abort = true; calledHelp = true; }
122                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
123                 
124                 else {
125                         vector<string> myArray = setParameters();
126                         
127                         OptionParser parser(option);
128                         map<string,string> parameters = parser.getParameters();
129                         
130                         ValidParameters validParameter;
131                         map<string,string>::iterator it;
132                         
133                         //check to make sure all parameters are valid for command
134                         for (it = parameters.begin(); it != parameters.end(); it++) { 
135                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
136                         }
137                         
138                         //initialize outputTypes
139                         vector<string> tempOutNames;
140                         outputTypes["fasta"] = tempOutNames;
141                         outputTypes["qfile"] = tempOutNames;
142                         outputTypes["group"] = tempOutNames;
143                         outputTypes["name"] = tempOutNames;
144                         
145                         //if the user changes the input directory command factory will send this info to us in the output parameter 
146                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
147                         if (inputDir == "not found"){   inputDir = "";          }
148                         else {
149                                 string path;
150                                 it = parameters.find("fasta");
151                                 //user has given a template file
152                                 if(it != parameters.end()){ 
153                                         path = m->hasPath(it->second);
154                                         //if the user has not given a path then, add inputdir. else leave path alone.
155                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
156                                 }
157                                 
158                                 it = parameters.find("oligos");
159                                 //user has given a template file
160                                 if(it != parameters.end()){ 
161                                         path = m->hasPath(it->second);
162                                         //if the user has not given a path then, add inputdir. else leave path alone.
163                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
164                                 }
165                                 
166                                 it = parameters.find("qfile");
167                                 //user has given a template file
168                                 if(it != parameters.end()){ 
169                                         path = m->hasPath(it->second);
170                                         //if the user has not given a path then, add inputdir. else leave path alone.
171                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
172                                 }
173                                 
174                                 it = parameters.find("name");
175                                 //user has given a template file
176                                 if(it != parameters.end()){ 
177                                         path = m->hasPath(it->second);
178                                         //if the user has not given a path then, add inputdir. else leave path alone.
179                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
180                                 }
181                                 
182                         }
183
184                         
185                         //check for required parameters
186                         fastaFile = validParameter.validFile(parameters, "fasta", true);
187                         if (fastaFile == "not found") {                                 
188                                 fastaFile = m->getFastaFile(); 
189                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
190                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
191                         }else if (fastaFile == "not open") { abort = true; }    
192                         
193                         //if the user changes the output directory command factory will send this info to us in the output parameter 
194                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
195                                 outputDir = ""; 
196                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
197                         }
198                 
199                         
200                         //check for optional parameter and set defaults
201                         // ...at some point should added some additional type checking...
202                         string temp;
203                         temp = validParameter.validFile(parameters, "flip", false);
204                         if (temp == "not found"){       flip = 0;       }
205                         else if(m->isTrue(temp))        {       flip = 1;       }
206                 
207                         temp = validParameter.validFile(parameters, "oligos", true);
208                         if (temp == "not found"){       oligoFile = "";         }
209                         else if(temp == "not open"){    abort = true;   } 
210                         else                                    {       oligoFile = temp;               }
211                         
212                         
213                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
214                         convert(temp, maxAmbig);  
215
216                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
217                         convert(temp, maxHomoP);  
218
219                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
220                         convert(temp, minLength); 
221                         
222                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
223                         convert(temp, maxLength);
224                         
225                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
226                         convert(temp, bdiffs);
227                         
228                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
229                         convert(temp, pdiffs);
230                         
231                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
232                         convert(temp, tdiffs);
233                         
234                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
235                         
236                         temp = validParameter.validFile(parameters, "qfile", true);     
237                         if (temp == "not found")        {       qFileName = "";         }
238                         else if(temp == "not open")     {       abort = true;           }
239                         else                                            {       qFileName = temp;       }
240                         
241                         temp = validParameter.validFile(parameters, "name", true);      
242                         if (temp == "not found")        {       nameFile = "";          }
243                         else if(temp == "not open")     {       nameFile = "";  abort = true;           }
244                         else                                            {       nameFile = temp;        }
245                         
246                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
247                         convert(temp, qThreshold);
248                         
249                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
250                         qtrim = m->isTrue(temp);
251
252                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
253                         convert(temp, qRollAverage);
254
255                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
256                         convert(temp, qWindowAverage);
257
258                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
259                         convert(temp, qWindowSize);
260
261                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
262                         convert(temp, qWindowStep);
263
264                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
265                         convert(temp, qAverage);
266
267                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
268                         convert(temp, keepFirst);
269
270                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
271                         convert(temp, removeLast);
272                         
273                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
274                         allFiles = m->isTrue(temp);
275                         
276                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
277                         m->setProcessors(temp);
278                         convert(temp, processors); 
279                         
280                         
281                         if(allFiles && (oligoFile == "")){
282                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
283                         }
284                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
285                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
286                                 qAverage=0;
287                                 qThreshold=0;
288                         }
289                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
290                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
291                                 abort = true;
292                         }
293                 }
294
295         }
296         catch(exception& e) {
297                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
298                 exit(1);
299         }
300 }
301 //***************************************************************************************************************
302
303 int TrimSeqsCommand::execute(){
304         try{
305         
306                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
307                 
308                 numFPrimers = 0;  //this needs to be initialized
309                 numRPrimers = 0;
310                 vector<vector<string> > fastaFileNames;
311                 vector<vector<string> > qualFileNames;
312                 vector<vector<string> > nameFileNames;
313                 
314                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
315                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
316                 
317                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
318                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
319                 
320                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
321                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
322                 
323                 if (qFileName != "") {
324                         outputNames.push_back(trimQualFile);
325                         outputNames.push_back(scrapQualFile);
326                         outputTypes["qfile"].push_back(trimQualFile);
327                         outputTypes["qfile"].push_back(scrapQualFile); 
328                 }
329                 
330                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
331                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
332                 
333                 if (nameFile != "") {
334                         m->readNames(nameFile, nameMap);
335                         outputNames.push_back(trimNameFile);
336                         outputNames.push_back(scrapNameFile);
337                         outputTypes["name"].push_back(trimNameFile);
338                         outputTypes["name"].push_back(scrapNameFile); 
339                 }
340                 
341                 if (m->control_pressed) { return 0; }
342                 
343                 string outputGroupFileName;
344                 if(oligoFile != ""){
345                         outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
346                         outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
347                         getOligos(fastaFileNames, qualFileNames, nameFileNames);
348                 }
349                 
350                 vector<unsigned long int> fastaFilePos;
351                 vector<unsigned long int> qFilePos;
352                 
353                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
354                 
355                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
356                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
357                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
358                 }       
359                 if(qFileName == "")     {       qLines = lines; } //files with duds
360                 
361                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
362                                 if(processors == 1){
363                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
364                                 }else{
365                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
366                                 }       
367                 #else
368                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
369                 #endif
370                 
371                 if (m->control_pressed) {  return 0; }                  
372         
373                 if(allFiles){
374                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
375                         map<string, string>::iterator it;
376                         set<string> namesToRemove;
377                         for(int i=0;i<fastaFileNames.size();i++){
378                                 for(int j=0;j<fastaFileNames[0].size();j++){
379                                         if (fastaFileNames[i][j] != "") {
380                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
381                                                         if(m->isBlank(fastaFileNames[i][j])){
382                                                                 remove(fastaFileNames[i][j].c_str());
383                                                                 namesToRemove.insert(fastaFileNames[i][j]);
384                                                         
385                                                                 if(qFileName != ""){
386                                                                         remove(qualFileNames[i][j].c_str());
387                                                                         namesToRemove.insert(qualFileNames[i][j]);
388                                                                 }
389                                                                 
390                                                                 if(nameFile != ""){
391                                                                         remove(nameFileNames[i][j].c_str());
392                                                                         namesToRemove.insert(nameFileNames[i][j]);
393                                                                 }
394                                                         }else{  
395                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
396                                                                 if (it == uniqueFastaNames.end()) {     
397                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
398                                                                 }       
399                                                         }
400                                                 }
401                                         }
402                                 }
403                         }
404                         
405                         //remove names for outputFileNames, just cleans up the output
406                         vector<string> outputNames2;
407                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
408                         outputNames = outputNames2;
409                         
410                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
411                                 ifstream in;
412                                 m->openInputFile(it->first, in);
413                                 
414                                 ofstream out;
415                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
416                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
417                                 m->openOutputFile(thisGroupName, out);
418                                 
419                                 while (!in.eof()){
420                                         if (m->control_pressed) { break; }
421                                         
422                                         Sequence currSeq(in); m->gobble(in);
423                                         out << currSeq.getName() << '\t' << it->second << endl;
424                                 }
425                                 in.close();
426                                 out.close();
427                         }
428                 }
429                 
430                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0;     }
431
432                 //output group counts
433                 m->mothurOutEndLine();
434                 int total = 0;
435                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
436                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
437                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
438                 }
439                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
440                 
441                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0;     }
442
443                 //set fasta file as new current fastafile
444                 string current = "";
445                 itTypes = outputTypes.find("fasta");
446                 if (itTypes != outputTypes.end()) {
447                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
448                 }
449                 
450                 itTypes = outputTypes.find("name");
451                 if (itTypes != outputTypes.end()) {
452                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
453                 }
454                 
455                 itTypes = outputTypes.find("qfile");
456                 if (itTypes != outputTypes.end()) {
457                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
458                 }
459                 
460                 itTypes = outputTypes.find("group");
461                 if (itTypes != outputTypes.end()) {
462                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
463                 }
464
465                 m->mothurOutEndLine();
466                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
467                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
468                 m->mothurOutEndLine();
469                 
470                 return 0;       
471                         
472         }
473         catch(exception& e) {
474                 m->errorOut(e, "TrimSeqsCommand", "execute");
475                 exit(1);
476         }
477 }
478                 
479 /**************************************************************************************/
480
481 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair* line, linePair* qline) {    
482                 
483         try {
484                 
485                 ofstream trimFASTAFile;
486                 m->openOutputFile(trimFileName, trimFASTAFile);
487                 
488                 ofstream scrapFASTAFile;
489                 m->openOutputFile(scrapFileName, scrapFASTAFile);
490                 
491                 ofstream trimQualFile;
492                 ofstream scrapQualFile;
493                 if(qFileName != ""){
494                         m->openOutputFile(trimQFileName, trimQualFile);
495                         m->openOutputFile(scrapQFileName, scrapQualFile);
496                 }
497                 
498                 ofstream trimNameFile;
499                 ofstream scrapNameFile;
500                 if(nameFile != ""){
501                         m->openOutputFile(trimNFileName, trimNameFile);
502                         m->openOutputFile(scrapNFileName, scrapNameFile);
503                 }
504                 
505                 
506                 ofstream outGroupsFile;
507                 if (oligoFile != ""){   m->openOutputFile(groupFileName, outGroupsFile);   }
508                 if(allFiles){
509                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
510                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
511                                         if (fastaFileNames[i][j] != "") {
512                                                 ofstream temp;
513                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
514                                                 if(qFileName != ""){
515                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
516                                                 }
517                                                 
518                                                 if(nameFile != ""){
519                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
520                                                 }
521                                         }
522                                 }
523                         }
524                 }
525                 
526                 ifstream inFASTA;
527                 m->openInputFile(filename, inFASTA);
528                 inFASTA.seekg(line->start);
529                 
530                 ifstream qFile;
531                 if(qFileName != "")     {
532                         m->openInputFile(qFileName, qFile);
533                         qFile.seekg(qline->start);  
534                 }
535                 
536                 int count = 0;
537                 bool moreSeqs = 1;
538         
539                 while (moreSeqs) {
540                                 
541                         if (m->control_pressed) { 
542                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
543                                 if (oligoFile != "") {   outGroupsFile.close();   }
544
545                                 if(qFileName != ""){
546                                         qFile.close();
547                                 }
548                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
549
550                                 return 0;
551                         }
552                         
553                         int success = 1;
554                         string trashCode = "";
555                         int currentSeqsDiffs = 0;
556
557                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
558                         
559                         QualityScores currQual;
560                         if(qFileName != ""){
561                                 currQual = QualityScores(qFile);  m->gobble(qFile);
562                         }
563
564                         string origSeq = currSeq.getUnaligned();
565                         if (origSeq != "") {
566                                 
567                                 int barcodeIndex = 0;
568                                 int primerIndex = 0;
569                                 
570                                 if(barcodes.size() != 0){
571                                         success = stripBarcode(currSeq, currQual, barcodeIndex);
572                                         if(success > bdiffs)            {       trashCode += 'b';       }
573                                         else{ currentSeqsDiffs += success;  }
574                                 }
575                                 
576                                 if(numFPrimers != 0){
577                                         success = stripForward(currSeq, currQual, primerIndex);
578                                         if(success > pdiffs)            {       trashCode += 'f';       }
579                                         else{ currentSeqsDiffs += success;  }
580                                 }
581                                 
582                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
583                                 
584                                 if(numRPrimers != 0){
585                                         success = stripReverse(currSeq, currQual);
586                                         if(!success)                            {       trashCode += 'r';       }
587                                 }
588
589                                 if(keepFirst != 0){
590                                         success = keepFirstTrim(currSeq, currQual);
591                                 }
592                                 
593                                 if(removeLast != 0){
594                                         success = removeLastTrim(currSeq, currQual);
595                                         if(!success)                            {       trashCode += 'l';       }
596                                 }
597
598                                 
599                                 if(qFileName != ""){
600                                         int origLength = currSeq.getNumBases();
601                                         
602                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
603                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
604                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
605                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
606                                         else                                            {       success = 1;                            }
607                                 
608                                         //you don't want to trim, if it fails above then scrap it
609                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
610                                         
611                                         if(!success)                            {       trashCode += 'q';       }
612                                 }                               
613                 
614                                 if(minLength > 0 || maxLength > 0){
615                                         success = cullLength(currSeq);
616                                         if(!success)                            {       trashCode += 'l';       }
617                                 }
618                                 if(maxHomoP > 0){
619                                         success = cullHomoP(currSeq);
620                                         if(!success)                            {       trashCode += 'h';       }
621                                 }
622                                 if(maxAmbig != -1){
623                                         success = cullAmbigs(currSeq);
624                                         if(!success)                            {       trashCode += 'n';       }
625                                 }
626                                 
627                                 if(flip){               // should go last                       
628                                         currSeq.reverseComplement();
629                                         if(qFileName != ""){
630                                                 currQual.flipQScores(); 
631                                         }
632                                 }
633                                 
634                                 if(trashCode.length() == 0){
635                                         currSeq.setAligned(currSeq.getUnaligned());
636                                         currSeq.printSequence(trimFASTAFile);
637                                         
638                                         if(qFileName != ""){
639                                                 currQual.printQScores(trimQualFile);
640                                         }
641                                         
642                                         if(nameFile != ""){
643                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
644                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
645                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
646                                         }
647                                         
648                                         if(barcodes.size() != 0){
649                                                 string thisGroup = barcodeNameVector[barcodeIndex];
650                                                 if (primers.size() != 0) { if (primerNameVector[primerIndex] != "") { thisGroup += "." + primerNameVector[primerIndex]; } }
651                                                 
652                                                 outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
653                                                 
654                                                 if (nameFile != "") {
655                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
656                                                         if (itName != nameMap.end()) { 
657                                                                 vector<string> thisSeqsNames; 
658                                                                 m->splitAtChar(itName->second, thisSeqsNames, ',');
659                                                                 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
660                                                                         outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
661                                                                 }
662                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
663                                                 }
664                                                 
665                                                 map<string, int>::iterator it = groupCounts.find(thisGroup);
666                                                 if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
667                                                 else { groupCounts[it->first]++; }
668                                                         
669                                         }
670                                         
671                                         
672                                         if(allFiles){
673                                                 ofstream output;
674                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
675                                                 currSeq.printSequence(output);
676                                                 output.close();
677                                                 
678                                                 if(qFileName != ""){
679                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
680                                                         currQual.printQScores(output);
681                                                         output.close();                                                 
682                                                 }
683                                                 
684                                                 if(nameFile != ""){
685                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
686                                                         if (itName != nameMap.end()) { 
687                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
688                                                                 output << itName->first << '\t' << itName->second << endl; 
689                                                                 output.close();
690                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
691                                                 }
692                                         }
693                                 }
694                                 else{
695                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
696                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
697                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
698                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
699                                         }
700                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
701                                         currSeq.setUnaligned(origSeq);
702                                         currSeq.setAligned(origSeq);
703                                         currSeq.printSequence(scrapFASTAFile);
704                                         if(qFileName != ""){
705                                                 currQual.printQScores(scrapQualFile);
706                                         }
707                                 }
708                                 count++;
709                         }
710                         
711                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
712                                 unsigned long int pos = inFASTA.tellg();
713                                 if ((pos == -1) || (pos >= line->end)) { break; }
714                         #else
715                                 if (inFASTA.eof()) { break; }
716                         #endif
717                                 
718                         //report progress
719                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
720                         
721                 }
722                 //report progress
723                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
724
725                 
726                 inFASTA.close();
727                 trimFASTAFile.close();
728                 scrapFASTAFile.close();
729                 if (oligoFile != "") {   outGroupsFile.close();   }
730                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
731                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
732                 
733                 return count;
734         }
735         catch(exception& e) {
736                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
737                 exit(1);
738         }
739 }
740
741 /**************************************************************************************************/
742
743 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
744         try {
745 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
746                 int process = 1;
747                 int exitCommand = 1;
748                 processIDS.clear();
749                 
750                 //loop through and create all the processes you want
751                 while (process != processors) {
752                         int pid = fork();
753                         
754                         if (pid > 0) {
755                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
756                                 process++;
757                         }else if (pid == 0){
758                                 
759                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
760                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
761                                 vector<vector<string> > tempNameFileNames = nameFileNames;
762
763                                 if(allFiles){
764                                         ofstream temp;
765
766                                         for(int i=0;i<tempFASTAFileNames.size();i++){
767                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
768                                                         if (tempFASTAFileNames[i][j] != "") {
769                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
770                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
771
772                                                                 if(qFileName != ""){
773                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
774                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
775                                                                 }
776                                                                 if(nameFile != ""){
777                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
778                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
779                                                                 }
780                                                         }
781                                                 }
782                                         }
783                                 }
784                                                         
785                                 driverCreateTrim(filename,
786                                                                  qFileName,
787                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
788                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
789                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
790                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
791                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
792                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
793                                                                  (groupFile + toString(getpid()) + ".temp"),
794                                                                  tempFASTAFileNames,
795                                                                  tempPrimerQualFileNames,
796                                                                  tempNameFileNames,
797                                                                  lines[process],
798                                                                  qLines[process]);
799                                 
800                                 //pass groupCounts to parent
801                                 ofstream out;
802                                 string tempFile = filename + toString(getpid()) + ".num.temp";
803                                 m->openOutputFile(tempFile, out);
804                                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
805                                         out << it->first << '\t' << it->second << endl;
806                                 }
807                                 out.close();
808                                 
809                                 exit(0);
810                         }else { 
811                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
812                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
813                                 exit(0);
814                         }
815                 }
816                 
817                 //parent do my part
818                 ofstream temp;
819                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
820                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
821                 if(qFileName != ""){
822                         m->openOutputFile(trimQualFileName, temp);              temp.close();
823                         m->openOutputFile(scrapQualFileName, temp);             temp.close();
824                 }
825                 if (nameFile != "") {
826                         m->openOutputFile(trimNameFileName, temp);              temp.close();
827                         m->openOutputFile(scrapNameFileName, temp);             temp.close();
828                 }
829
830                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
831                 
832                 //force parent to wait until all the processes are done
833                 for (int i=0;i<processIDS.size();i++) { 
834                         int temp = processIDS[i];
835                         wait(&temp);
836                 }
837                 
838                 //append files
839                 for(int i=0;i<processIDS.size();i++){
840                         
841                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
842                         
843                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
844                         remove((trimFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
845                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
846                         remove((scrapFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
847                         
848                         if(qFileName != ""){
849                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
850                                 remove((trimQualFileName + toString(processIDS[i]) + ".temp").c_str());
851                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
852                                 remove((scrapQualFileName + toString(processIDS[i]) + ".temp").c_str());
853                         }
854                         
855                         if(nameFile != ""){
856                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
857                                 remove((trimNameFileName + toString(processIDS[i]) + ".temp").c_str());
858                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
859                                 remove((scrapNameFileName + toString(processIDS[i]) + ".temp").c_str());
860                         }
861                         
862                         m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
863                         remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
864                         
865                         
866                         if(allFiles){
867                                 for(int j=0;j<fastaFileNames.size();j++){
868                                         for(int k=0;k<fastaFileNames[j].size();k++){
869                                                 if (fastaFileNames[j][k] != "") {
870                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
871                                                         remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
872                                                         
873                                                         if(qFileName != ""){
874                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
875                                                                 remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
876                                                         }
877                                                         
878                                                         if(nameFile != ""){
879                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
880                                                                 remove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
881                                                         }
882                                                 }
883                                         }
884                                 }
885                         }
886                         
887                         ifstream in;
888                         string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
889                         m->openInputFile(tempFile, in);
890                         int tempNum;
891                         string group;
892                         while (!in.eof()) { 
893                                 in >> group >> tempNum; m->gobble(in);
894                                 
895                                 map<string, int>::iterator it = groupCounts.find(group);
896                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
897                                 else { groupCounts[it->first] += tempNum; }
898                         }
899                         in.close(); remove(tempFile.c_str());
900                         
901                 }
902         
903                 return exitCommand;
904 #endif          
905         }
906         catch(exception& e) {
907                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
908                 exit(1);
909         }
910 }
911
912 /**************************************************************************************************/
913
914 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
915         try {
916                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
917                 //set file positions for fasta file
918                 fastaFilePos = m->divideFile(filename, processors);
919                 
920                 if (qfilename == "") { return processors; }
921                 
922                 //get name of first sequence in each chunk
923                 map<string, int> firstSeqNames;
924                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
925                         ifstream in;
926                         m->openInputFile(filename, in);
927                         in.seekg(fastaFilePos[i]);
928                 
929                         Sequence temp(in); 
930                         firstSeqNames[temp.getName()] = i;
931                 
932                         in.close();
933                 }
934                                 
935                 //seach for filePos of each first name in the qfile and save in qfileFilePos
936                 ifstream inQual;
937                 m->openInputFile(qfilename, inQual);
938                 
939                 string input;
940                 while(!inQual.eof()){   
941                         input = m->getline(inQual);
942
943                         if (input.length() != 0) {
944                                 if(input[0] == '>'){ //this is a sequence name line
945                                         istringstream nameStream(input);
946                                         
947                                         string sname = "";  nameStream >> sname;
948                                         sname = sname.substr(1);
949                                         
950                                         map<string, int>::iterator it = firstSeqNames.find(sname);
951                                         
952                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
953                                                 unsigned long int pos = inQual.tellg(); 
954                                                 qfileFilePos.push_back(pos - input.length() - 1);       
955                                                 firstSeqNames.erase(it);
956                                         }
957                                 }
958                         }
959                         
960                         if (firstSeqNames.size() == 0) { break; }
961                 }
962                 inQual.close();
963                 
964                 
965                 if (firstSeqNames.size() != 0) { 
966                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
967                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
968                         }
969                         qFileName = "";
970                         return processors;
971                 }
972
973                 //get last file position of qfile
974                 FILE * pFile;
975                 unsigned long int size;
976                 
977                 //get num bytes in file
978                 pFile = fopen (qfilename.c_str(),"rb");
979                 if (pFile==NULL) perror ("Error opening file");
980                 else{
981                         fseek (pFile, 0, SEEK_END);
982                         size=ftell (pFile);
983                         fclose (pFile);
984                 }
985                 
986                 qfileFilePos.push_back(size);
987                 
988                 return processors;
989                 
990                 #else
991                 
992                         fastaFilePos.push_back(0); qfileFilePos.push_back(0);
993                         //get last file position of fastafile
994                         FILE * pFile;
995                         unsigned long int size;
996                         
997                         //get num bytes in file
998                         pFile = fopen (filename.c_str(),"rb");
999                         if (pFile==NULL) perror ("Error opening file");
1000                         else{
1001                                 fseek (pFile, 0, SEEK_END);
1002                                 size=ftell (pFile);
1003                                 fclose (pFile);
1004                         }
1005                         fastaFilePos.push_back(size);
1006                         
1007                         //get last file position of fastafile
1008                         FILE * qFile;
1009                         
1010                         //get num bytes in file
1011                         qFile = fopen (qfilename.c_str(),"rb");
1012                         if (qFile==NULL) perror ("Error opening file");
1013                         else{
1014                                 fseek (qFile, 0, SEEK_END);
1015                                 size=ftell (qFile);
1016                                 fclose (qFile);
1017                         }
1018                         qfileFilePos.push_back(size);
1019                 
1020                         return 1;
1021                 
1022                 #endif
1023         }
1024         catch(exception& e) {
1025                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1026                 exit(1);
1027         }
1028 }
1029
1030 //***************************************************************************************************************
1031
1032 void TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1033         try {
1034                 ifstream inOligos;
1035                 m->openInputFile(oligoFile, inOligos);
1036                 
1037                 ofstream test;
1038                 
1039                 string type, oligo, group;
1040
1041                 int indexPrimer = 0;
1042                 int indexBarcode = 0;
1043                 
1044                 while(!inOligos.eof()){
1045
1046                         inOligos >> type; 
1047                                         
1048                         if(type[0] == '#'){
1049                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1050                                 m->gobble(inOligos);
1051                         }
1052                         else{
1053                                 m->gobble(inOligos);
1054                                 //make type case insensitive
1055                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1056                                 
1057                                 inOligos >> oligo;
1058                                 
1059                                 for(int i=0;i<oligo.length();i++){
1060                                         oligo[i] = toupper(oligo[i]);
1061                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1062                                 }
1063                                 
1064                                 if(type == "FORWARD"){
1065                                         group = "";
1066                                         
1067                                         // get rest of line in case there is a primer name
1068                                         while (!inOligos.eof()) {       
1069                                                 char c = inOligos.get(); 
1070                                                 if (c == 10 || c == 13){        break;  }
1071                                                 else if (c == 32 || c == 9){;} //space or tab
1072                                                 else {  group += c;  }
1073                                         } 
1074                                         
1075                                         //check for repeat barcodes
1076                                         map<string, int>::iterator itPrime = primers.find(oligo);
1077                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1078                                         
1079                                         primers[oligo]=indexPrimer; indexPrimer++;              
1080                                         primerNameVector.push_back(group);
1081                                 }
1082                                 else if(type == "REVERSE"){
1083                                         Sequence oligoRC("reverse", oligo);
1084                                         oligoRC.reverseComplement();
1085                                         revPrimer.push_back(oligoRC.getUnaligned());
1086                                 }
1087                                 else if(type == "BARCODE"){
1088                                         inOligos >> group;
1089                                         
1090                                         //check for repeat barcodes
1091                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1092                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1093                                                 
1094                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1095                                         barcodeNameVector.push_back(group);
1096                                 }
1097                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
1098                         }
1099                         m->gobble(inOligos);
1100                 }       
1101                 inOligos.close();
1102                 
1103                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1104                 
1105                 //add in potential combos
1106                 if(barcodeNameVector.size() == 0){
1107                         barcodes[""] = 0;
1108                         barcodeNameVector.push_back("");                        
1109                 }
1110                 
1111                 if(primerNameVector.size() == 0){
1112                         primers[""] = 0;
1113                         primerNameVector.push_back("");                 
1114                 }
1115                 
1116                 fastaFileNames.resize(barcodeNameVector.size());
1117                 for(int i=0;i<fastaFileNames.size();i++){
1118                         fastaFileNames[i].assign(primerNameVector.size(), "");
1119                 }
1120                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1121                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1122                 
1123                 if(allFiles){
1124                         set<string> uniqueNames; //used to cleanup outputFileNames
1125                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1126                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1127                                         
1128                                         string primerName = primerNameVector[itPrimer->second];
1129                                         string barcodeName = barcodeNameVector[itBar->second];
1130                                         
1131                                         string comboGroupName = "";
1132                                         string fastaFileName = "";
1133                                         string qualFileName = "";
1134                                         string nameFileName = "";
1135                                         
1136                                         if(primerName == ""){
1137                                                 comboGroupName = barcodeNameVector[itBar->second];
1138                                         }
1139                                         else{
1140                                                 if(barcodeName == ""){
1141                                                         comboGroupName = primerNameVector[itPrimer->second];
1142                                                 }
1143                                                 else{
1144                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1145                                                 }
1146                                         }
1147                                         
1148                                         
1149                                         ofstream temp;
1150                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1151                                         if (uniqueNames.count(fastaFileName) == 0) {
1152                                                 outputNames.push_back(fastaFileName);
1153                                                 outputTypes["fasta"].push_back(fastaFileName);
1154                                                 uniqueNames.insert(fastaFileName);
1155                                         }
1156                                         
1157                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1158                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1159                                         
1160                                         if(qFileName != ""){
1161                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1162                                                 if (uniqueNames.count(qualFileName) == 0) {
1163                                                         outputNames.push_back(qualFileName);
1164                                                         outputTypes["qfile"].push_back(qualFileName);
1165                                                 }
1166                                                 
1167                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1168                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1169                                         }
1170                                         
1171                                         if(nameFile != ""){
1172                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1173                                                 if (uniqueNames.count(nameFileName) == 0) {
1174                                                         outputNames.push_back(nameFileName);
1175                                                         outputTypes["name"].push_back(nameFileName);
1176                                                 }
1177                                                 
1178                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1179                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1180                                         }
1181                                         
1182                                 }
1183                         }
1184                 }
1185                 numFPrimers = primers.size();
1186                 numRPrimers = revPrimer.size();
1187
1188         }
1189         catch(exception& e) {
1190                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1191                 exit(1);
1192         }
1193 }
1194
1195 //***************************************************************************************************************
1196
1197 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
1198         try {
1199                 
1200                 string rawSequence = seq.getUnaligned();
1201                 int success = bdiffs + 1;       //guilty until proven innocent
1202                 
1203                 //can you find the barcode
1204                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1205                         string oligo = it->first;
1206                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
1207                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1208                                 break;  
1209                         }
1210                         
1211                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1212                                 group = it->second;
1213                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1214                                 
1215                                 if(qual.getName() != ""){
1216                                         qual.trimQScores(oligo.length(), -1);
1217                                 }
1218                                 
1219                                 success = 0;
1220                                 break;
1221                         }
1222                 }
1223                 
1224                 //if you found the barcode or if you don't want to allow for diffs
1225                 if ((bdiffs == 0) || (success == 0)) { return success;  }
1226                 
1227                 else { //try aligning and see if you can find it
1228
1229                         int maxLength = 0;
1230
1231                         Alignment* alignment;
1232                         if (barcodes.size() > 0) {
1233                                 map<string,int>::iterator it=barcodes.begin();
1234
1235                                 for(it;it!=barcodes.end();it++){
1236                                         if(it->first.length() > maxLength){
1237                                                 maxLength = it->first.length();
1238                                         }
1239                                 }
1240                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
1241
1242                         }else{ alignment = NULL; } 
1243                         
1244                         //can you find the barcode
1245                         int minDiff = 1e6;
1246                         int minCount = 1;
1247                         int minGroup = -1;
1248                         int minPos = 0;
1249                         
1250                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1251                                 string oligo = it->first;
1252 //                              int length = oligo.length();
1253                                 
1254                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
1255                                         success = bdiffs + 10;
1256                                         break;
1257                                 }
1258                                 
1259                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1260                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1261                                 oligo = alignment->getSeqAAln();
1262                                 string temp = alignment->getSeqBAln();
1263                 
1264                                 int alnLength = oligo.length();
1265                                 
1266                                 for(int i=oligo.length()-1;i>=0;i--){
1267                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1268                                 }
1269                                 oligo = oligo.substr(0,alnLength);
1270                                 temp = temp.substr(0,alnLength);
1271                                 
1272                                 int numDiff = countDiffs(oligo, temp);
1273                                 
1274                                 if(numDiff < minDiff){
1275                                         minDiff = numDiff;
1276                                         minCount = 1;
1277                                         minGroup = it->second;
1278                                         minPos = 0;
1279                                         for(int i=0;i<alnLength;i++){
1280                                                 if(temp[i] != '-'){
1281                                                         minPos++;
1282                                                 }
1283                                         }
1284                                 }
1285                                 else if(numDiff == minDiff){
1286                                         minCount++;
1287                                 }
1288
1289                         }
1290
1291                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1292                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1293                         else{                                                                                                   //use the best match
1294                                 group = minGroup;
1295                                 seq.setUnaligned(rawSequence.substr(minPos));
1296                                 
1297                                 if(qual.getName() != ""){
1298                                         qual.trimQScores(minPos, -1);
1299                                 }
1300                                 success = minDiff;
1301                         }
1302                         
1303                         if (alignment != NULL) {  delete alignment;  }
1304                         
1305                 }
1306                 
1307                 return success;
1308                 
1309         }
1310         catch(exception& e) {
1311                 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1312                 exit(1);
1313         }
1314
1315 }
1316
1317 //***************************************************************************************************************
1318
1319 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1320         try {
1321                 string rawSequence = seq.getUnaligned();
1322                 int success = pdiffs + 1;       //guilty until proven innocent
1323                 
1324                 //can you find the primer
1325                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1326                         string oligo = it->first;
1327                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1328                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1329                                 break;  
1330                         }
1331                         
1332                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1333                                 group = it->second;
1334                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1335                                 if(qual.getName() != ""){
1336                                         qual.trimQScores(oligo.length(), -1);
1337                                 }
1338                                 success = 0;
1339                                 break;
1340                         }
1341                 }
1342
1343                 //if you found the barcode or if you don't want to allow for diffs
1344                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1345                 
1346                 else { //try aligning and see if you can find it
1347
1348                         int maxLength = 0;
1349
1350                         Alignment* alignment;
1351                         if (primers.size() > 0) {
1352                                 map<string,int>::iterator it=primers.begin();
1353
1354                                 for(it;it!=primers.end();it++){
1355                                         if(it->first.length() > maxLength){
1356                                                 maxLength = it->first.length();
1357                                         }
1358                                 }
1359                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
1360
1361                         }else{ alignment = NULL; } 
1362                         
1363                         //can you find the barcode
1364                         int minDiff = 1e6;
1365                         int minCount = 1;
1366                         int minGroup = -1;
1367                         int minPos = 0;
1368                         
1369                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1370                                 string oligo = it->first;
1371 //                              int length = oligo.length();
1372                                 
1373                                 if(rawSequence.length() < maxLength){   
1374                                         success = pdiffs + 100;
1375                                         break;
1376                                 }
1377                                 
1378                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1379                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1380                                 oligo = alignment->getSeqAAln();
1381                                 string temp = alignment->getSeqBAln();
1382                 
1383                                 int alnLength = oligo.length();
1384                                 
1385                                 for(int i=oligo.length()-1;i>=0;i--){
1386                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1387                                 }
1388                                 oligo = oligo.substr(0,alnLength);
1389                                 temp = temp.substr(0,alnLength);
1390                                 
1391                                 int numDiff = countDiffs(oligo, temp);
1392                                 
1393                                 if(numDiff < minDiff){
1394                                         minDiff = numDiff;
1395                                         minCount = 1;
1396                                         minGroup = it->second;
1397                                         minPos = 0;
1398                                         for(int i=0;i<alnLength;i++){
1399                                                 if(temp[i] != '-'){
1400                                                         minPos++;
1401                                                 }
1402                                         }
1403                                 }
1404                                 else if(numDiff == minDiff){
1405                                         minCount++;
1406                                 }
1407
1408                         }
1409
1410                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1411                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1412                         else{                                                                                                   //use the best match
1413                                 group = minGroup;
1414                                 seq.setUnaligned(rawSequence.substr(minPos));
1415                                 if(qual.getName() != ""){
1416                                         qual.trimQScores(minPos, -1);
1417                                 }
1418                                 success = minDiff;
1419                         }
1420                         
1421                         if (alignment != NULL) {  delete alignment;  }
1422                         
1423                 }
1424                 
1425                 return success;
1426
1427         }
1428         catch(exception& e) {
1429                 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1430                 exit(1);
1431         }
1432 }
1433
1434 //***************************************************************************************************************
1435
1436 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1437         try {
1438                 string rawSequence = seq.getUnaligned();
1439                 bool success = 0;       //guilty until proven innocent
1440                 
1441                 for(int i=0;i<numRPrimers;i++){
1442                         string oligo = revPrimer[i];
1443                         
1444                         if(rawSequence.length() < oligo.length()){
1445                                 success = 0;
1446                                 break;
1447                         }
1448                         
1449                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1450                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1451                                 if(qual.getName() != ""){
1452                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1453                                 }
1454                                 success = 1;
1455                                 break;
1456                         }
1457                 }       
1458                 return success;
1459                 
1460         }
1461         catch(exception& e) {
1462                 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1463                 exit(1);
1464         }
1465 }
1466
1467 //***************************************************************************************************************
1468
1469 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1470         try {
1471                 bool success = 1;
1472                 if(qscores.getName() != ""){
1473                         qscores.trimQScores(-1, keepFirst);
1474                 }
1475                 sequence.trim(keepFirst);
1476                 return success;
1477         }
1478         catch(exception& e) {
1479                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1480                 exit(1);
1481         }
1482         
1483 }       
1484
1485 //***************************************************************************************************************
1486
1487 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1488         try {
1489                 bool success = 0;
1490                 
1491                 int length = sequence.getNumBases() - removeLast;
1492                 
1493                 if(length > 0){
1494                         if(qscores.getName() != ""){
1495                                 qscores.trimQScores(-1, length);
1496                         }
1497                         sequence.trim(length);
1498                         success = 1;
1499                 }
1500                 else{
1501                         success = 0;
1502                 }
1503
1504                 return success;
1505         }
1506         catch(exception& e) {
1507                 m->errorOut(e, "removeLastTrim", "countDiffs");
1508                 exit(1);
1509         }
1510         
1511 }       
1512
1513 //***************************************************************************************************************
1514
1515 bool TrimSeqsCommand::cullLength(Sequence& seq){
1516         try {
1517         
1518                 int length = seq.getNumBases();
1519                 bool success = 0;       //guilty until proven innocent
1520                 
1521                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1522                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1523                 else                                                                                            {       success = 0;    }
1524                 
1525                 return success;
1526         
1527         }
1528         catch(exception& e) {
1529                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1530                 exit(1);
1531         }
1532         
1533 }
1534
1535 //***************************************************************************************************************
1536
1537 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1538         try {
1539                 int longHomoP = seq.getLongHomoPolymer();
1540                 bool success = 0;       //guilty until proven innocent
1541                 
1542                 if(longHomoP <= maxHomoP){      success = 1;    }
1543                 else                                    {       success = 0;    }
1544                 
1545                 return success;
1546         }
1547         catch(exception& e) {
1548                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1549                 exit(1);
1550         }
1551         
1552 }
1553
1554 //***************************************************************************************************************
1555
1556 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1557         try {
1558                 int numNs = seq.getAmbigBases();
1559                 bool success = 0;       //guilty until proven innocent
1560                 
1561                 if(numNs <= maxAmbig)   {       success = 1;    }
1562                 else                                    {       success = 0;    }
1563                 
1564                 return success;
1565         }
1566         catch(exception& e) {
1567                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1568                 exit(1);
1569         }
1570         
1571 }
1572
1573 //***************************************************************************************************************
1574
1575 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1576         try {
1577                 bool success = 1;
1578                 int length = oligo.length();
1579                 
1580                 for(int i=0;i<length;i++){
1581                         
1582                         if(oligo[i] != seq[i]){
1583                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1584                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1585                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1586                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1587                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1588                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1589                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1590                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1591                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1592                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1593                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1594                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1595                                 
1596                                 if(success == 0)        {       break;   }
1597                         }
1598                         else{
1599                                 success = 1;
1600                         }
1601                 }
1602                 
1603                 return success;
1604         }
1605         catch(exception& e) {
1606                 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1607                 exit(1);
1608         }
1609
1610 }
1611
1612 //***************************************************************************************************************
1613
1614 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1615         try {
1616
1617                 int length = oligo.length();
1618                 int countDiffs = 0;
1619                 
1620                 for(int i=0;i<length;i++){
1621                                                                 
1622                         if(oligo[i] != seq[i]){
1623                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1624                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1625                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1626                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1627                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1628                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1629                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1630                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1631                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1632                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1633                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1634                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1635                         }
1636                         
1637                 }
1638                 
1639                 return countDiffs;
1640         }
1641         catch(exception& e) {
1642                 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1643                 exit(1);
1644         }
1645
1646 }
1647
1648 //***************************************************************************************************************