]> git.donarmstrong.com Git - mothur.git/blob - trimseqscommand.cpp
finished adding the names option to trim.seqs
[mothur.git] / trimseqscommand.cpp
1 /*
2  *  trimseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/6/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "trimseqscommand.h"
11 #include "needlemanoverlap.hpp"
12
13 //**********************************************************************************************************************
14 vector<string> TrimSeqsCommand::setParameters(){        
15         try {
16                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
17                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
18                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
19                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
20                 CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
21                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
22                 CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxhomop);
23                 CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
24                 CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
25                 CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
26                 CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
27                 CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
28                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
29                 CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pallfiles);
30                 CommandParameter pqtrim("qtrim", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pqtrim);
31                 CommandParameter pqthreshold("qthreshold", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqthreshold);
32                 CommandParameter pqaverage("qaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqaverage);
33                 CommandParameter prollaverage("rollaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(prollaverage);
34                 CommandParameter pqwindowaverage("qwindowaverage", "Number", "", "0", "", "", "",false,false); parameters.push_back(pqwindowaverage);
35                 CommandParameter pqstepsize("qstepsize", "Number", "", "1", "", "", "",false,false); parameters.push_back(pqstepsize);
36                 CommandParameter pqwindowsize("qwindowsize", "Number", "", "50", "", "", "",false,false); parameters.push_back(pqwindowsize);
37                 CommandParameter pkeepfirst("keepfirst", "Number", "", "0", "", "", "",false,false); parameters.push_back(pkeepfirst);
38                 CommandParameter premovelast("removelast", "Number", "", "0", "", "", "",false,false); parameters.push_back(premovelast);
39                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
40                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
41                         
42                 vector<string> myArray;
43                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
44                 return myArray;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "TrimSeqsCommand", "setParameters");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 string TrimSeqsCommand::getHelpString(){        
53         try {
54                 string helpString = "";
55                 helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
56                 helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
57                 helpString += "The trim.seqs command parameters are fasta, name, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
58                 helpString += "The fasta parameter is required.\n";
59                 helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
60                 helpString += "The oligos parameter allows you to provide an oligos file.\n";
61                 helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
62                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
63                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
64                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
65                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
66                 helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs.\n";
67                 helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
68                 helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
69                 helpString += "The qfile parameter allows you to provide a quality file.\n";
70                 helpString += "The qthreshold parameter allows you to set a minimum quality score allowed. \n";
71                 helpString += "The qaverage parameter allows you to set a minimum average quality score allowed. \n";
72                 helpString += "The qwindowsize parameter allows you to set a number of bases in a window. Default=50.\n";
73                 helpString += "The qwindowaverage parameter allows you to set a minimum average quality score allowed over a window. \n";
74                 helpString += "The rollaverage parameter allows you to set a minimum rolling average quality score allowed over a window. \n";
75                 helpString += "The qstepsize parameter allows you to set a number of bases to move the window over. Default=1.\n";
76                 helpString += "The allfiles parameter will create separate group and fasta file for each grouping. The default is F.\n";
77                 helpString += "The qtrim parameter will trim sequence from the point that they fall below the qthreshold and put it in the .trim file if set to true. The default is T.\n";
78                 helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
79                 helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
80                 helpString += "The trim.seqs command should be in the following format: \n";
81                 helpString += "trim.seqs(fasta=yourFastaFile, flip=yourFlip, oligos=yourOligos, maxambig=yourMaxambig,  \n";
82                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
83                 helpString += "Example trim.seqs(fasta=abrecovery.fasta, flip=..., oligos=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
84                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
85                 helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.seqs .\n";
86                 return helpString;
87         }
88         catch(exception& e) {
89                 m->errorOut(e, "TrimSeqsCommand", "getHelpString");
90                 exit(1);
91         }
92 }
93
94
95 //**********************************************************************************************************************
96
97 TrimSeqsCommand::TrimSeqsCommand(){     
98         try {
99                 abort = true; calledHelp = true; 
100                 setParameters();
101                 vector<string> tempOutNames;
102                 outputTypes["fasta"] = tempOutNames;
103                 outputTypes["qfile"] = tempOutNames;
104                 outputTypes["group"] = tempOutNames;
105                 outputTypes["name"] = tempOutNames;
106         }
107         catch(exception& e) {
108                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
109                 exit(1);
110         }
111 }
112 //***************************************************************************************************************
113
114 TrimSeqsCommand::TrimSeqsCommand(string option)  {
115         try {
116                 
117                 abort = false; calledHelp = false;   
118                 comboStarts = 0;
119                 
120                 //allow user to run help
121                 if(option == "help") { help(); abort = true; calledHelp = true; }
122                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
123                 
124                 else {
125                         vector<string> myArray = setParameters();
126                         
127                         OptionParser parser(option);
128                         map<string,string> parameters = parser.getParameters();
129                         
130                         ValidParameters validParameter;
131                         map<string,string>::iterator it;
132                         
133                         //check to make sure all parameters are valid for command
134                         for (it = parameters.begin(); it != parameters.end(); it++) { 
135                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
136                         }
137                         
138                         //initialize outputTypes
139                         vector<string> tempOutNames;
140                         outputTypes["fasta"] = tempOutNames;
141                         outputTypes["qfile"] = tempOutNames;
142                         outputTypes["group"] = tempOutNames;
143                         outputTypes["name"] = tempOutNames;
144                         
145                         //if the user changes the input directory command factory will send this info to us in the output parameter 
146                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
147                         if (inputDir == "not found"){   inputDir = "";          }
148                         else {
149                                 string path;
150                                 it = parameters.find("fasta");
151                                 //user has given a template file
152                                 if(it != parameters.end()){ 
153                                         path = m->hasPath(it->second);
154                                         //if the user has not given a path then, add inputdir. else leave path alone.
155                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
156                                 }
157                                 
158                                 it = parameters.find("oligos");
159                                 //user has given a template file
160                                 if(it != parameters.end()){ 
161                                         path = m->hasPath(it->second);
162                                         //if the user has not given a path then, add inputdir. else leave path alone.
163                                         if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
164                                 }
165                                 
166                                 it = parameters.find("qfile");
167                                 //user has given a template file
168                                 if(it != parameters.end()){ 
169                                         path = m->hasPath(it->second);
170                                         //if the user has not given a path then, add inputdir. else leave path alone.
171                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
172                                 }
173                                 
174                                 it = parameters.find("name");
175                                 //user has given a template file
176                                 if(it != parameters.end()){ 
177                                         path = m->hasPath(it->second);
178                                         //if the user has not given a path then, add inputdir. else leave path alone.
179                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
180                                 }
181                                 
182                         }
183
184                         
185                         //check for required parameters
186                         fastaFile = validParameter.validFile(parameters, "fasta", true);
187                         if (fastaFile == "not found") {                                 
188                                 fastaFile = m->getFastaFile(); 
189                                 if (fastaFile != "") { m->mothurOut("Using " + fastaFile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
190                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
191                         }else if (fastaFile == "not open") { abort = true; }    
192                         
193                         //if the user changes the output directory command factory will send this info to us in the output parameter 
194                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
195                                 outputDir = ""; 
196                                 outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
197                         }
198                 
199                         
200                         //check for optional parameter and set defaults
201                         // ...at some point should added some additional type checking...
202                         string temp;
203                         temp = validParameter.validFile(parameters, "flip", false);
204                         if (temp == "not found"){       flip = 0;       }
205                         else if(m->isTrue(temp))        {       flip = 1;       }
206                 
207                         temp = validParameter.validFile(parameters, "oligos", true);
208                         if (temp == "not found"){       oligoFile = "";         }
209                         else if(temp == "not open"){    abort = true;   } 
210                         else                                    {       oligoFile = temp;               }
211                         
212                         
213                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
214                         convert(temp, maxAmbig);  
215
216                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "0"; }
217                         convert(temp, maxHomoP);  
218
219                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "0"; }
220                         convert(temp, minLength); 
221                         
222                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "0"; }
223                         convert(temp, maxLength);
224                         
225                         temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found") { temp = "0"; }
226                         convert(temp, bdiffs);
227                         
228                         temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
229                         convert(temp, pdiffs);
230                         
231                         temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
232                         convert(temp, tdiffs);
233                         
234                         if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
235                         
236                         temp = validParameter.validFile(parameters, "qfile", true);     
237                         if (temp == "not found")        {       qFileName = "";         }
238                         else if(temp == "not open")     {       abort = true;           }
239                         else                                            {       qFileName = temp;       }
240                         
241                         temp = validParameter.validFile(parameters, "name", true);      
242                         if (temp == "not found")        {       nameFile = "";          }
243                         else if(temp == "not open")     {       abort = true;           }
244                         else                                            {       nameFile = temp;        }
245                         
246                         temp = validParameter.validFile(parameters, "qthreshold", false);       if (temp == "not found") { temp = "0"; }
247                         convert(temp, qThreshold);
248                         
249                         temp = validParameter.validFile(parameters, "qtrim", false);            if (temp == "not found") { temp = "t"; }
250                         qtrim = m->isTrue(temp);
251
252                         temp = validParameter.validFile(parameters, "rollaverage", false);      if (temp == "not found") { temp = "0"; }
253                         convert(temp, qRollAverage);
254
255                         temp = validParameter.validFile(parameters, "qwindowaverage", false);if (temp == "not found") { temp = "0"; }
256                         convert(temp, qWindowAverage);
257
258                         temp = validParameter.validFile(parameters, "qwindowsize", false);      if (temp == "not found") { temp = "50"; }
259                         convert(temp, qWindowSize);
260
261                         temp = validParameter.validFile(parameters, "qstepsize", false);        if (temp == "not found") { temp = "1"; }
262                         convert(temp, qWindowStep);
263
264                         temp = validParameter.validFile(parameters, "qaverage", false);         if (temp == "not found") { temp = "0"; }
265                         convert(temp, qAverage);
266
267                         temp = validParameter.validFile(parameters, "keepfirst", false);        if (temp == "not found") { temp = "0"; }
268                         convert(temp, keepFirst);
269
270                         temp = validParameter.validFile(parameters, "removelast", false);       if (temp == "not found") { temp = "0"; }
271                         convert(temp, removeLast);
272                         
273                         temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "F"; }
274                         allFiles = m->isTrue(temp);
275                         
276                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
277                         m->setProcessors(temp);
278                         convert(temp, processors); 
279                         
280                         
281                         if(allFiles && (oligoFile == "")){
282                                 m->mothurOut("You selected allfiles, but didn't enter an oligos.  Ignoring the allfiles request."); m->mothurOutEndLine();
283                         }
284                         if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
285                                 m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
286                                 qAverage=0;
287                                 qThreshold=0;
288                         }
289                         if(!flip && oligoFile=="" && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP && qFileName == ""){               
290                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
291                                 abort = true;
292                         }
293                 }
294
295         }
296         catch(exception& e) {
297                 m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
298                 exit(1);
299         }
300 }
301 //***************************************************************************************************************
302
303 int TrimSeqsCommand::execute(){
304         try{
305         
306                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
307                 
308                 numFPrimers = 0;  //this needs to be initialized
309                 numRPrimers = 0;
310                 vector<vector<string> > fastaFileNames;
311                 vector<vector<string> > qualFileNames;
312                 vector<vector<string> > nameFileNames;
313                 
314                 string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
315                 outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
316                 
317                 string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
318                 outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
319                 
320                 string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
321                 string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
322                 
323                 if (qFileName != "") {
324                         outputNames.push_back(trimQualFile);
325                         outputNames.push_back(scrapQualFile);
326                         outputTypes["qfile"].push_back(trimQualFile);
327                         outputTypes["qfile"].push_back(scrapQualFile); 
328                 }
329                 
330                 string trimNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "trim.names";
331                 string scrapNameFile = outputDir + m->getRootName(m->getSimpleName(nameFile)) + "scrap.names";
332                 
333                 if (nameFile != "") {
334                         m->readNames(nameFile, nameMap);
335                         outputNames.push_back(trimNameFile);
336                         outputNames.push_back(scrapNameFile);
337                         outputTypes["name"].push_back(trimNameFile);
338                         outputTypes["name"].push_back(scrapNameFile); 
339                 }
340                 
341                 if (m->control_pressed) { return 0; }
342                 
343                 string outputGroupFileName;
344                 if(oligoFile != ""){
345                         outputGroupFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
346                         outputNames.push_back(outputGroupFileName); outputTypes["group"].push_back(outputGroupFileName);
347                         getOligos(fastaFileNames, qualFileNames, nameFileNames);
348                 }
349                 
350                 vector<unsigned long int> fastaFilePos;
351                 vector<unsigned long int> qFilePos;
352                 
353                 setLines(fastaFile, qFileName, fastaFilePos, qFilePos);
354                 
355                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
356                         lines.push_back(new linePair(fastaFilePos[i], fastaFilePos[(i+1)]));
357                         if (qFileName != "") {  qLines.push_back(new linePair(qFilePos[i], qFilePos[(i+1)]));  }
358                 }       
359                 if(qFileName == "")     {       qLines = lines; } //files with duds
360                 
361                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
362                                 if(processors == 1){
363                                         driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
364                                 }else{
365                                         createProcessesCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames); 
366                                 }       
367                 #else
368                                 driverCreateTrim(fastaFile, qFileName, trimSeqFile, scrapSeqFile, trimQualFile, scrapQualFile, trimNameFile, scrapNameFile, outputGroupFileName, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
369                 #endif
370                 
371                 if (m->control_pressed) {  return 0; }                  
372         
373                 if(allFiles){
374                         map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
375                         map<string, string>::iterator it;
376                         set<string> namesToRemove;
377                         for(int i=0;i<fastaFileNames.size();i++){
378                                 for(int j=0;j<fastaFileNames[0].size();j++){
379                                         if (fastaFileNames[i][j] != "") {
380                                                 if (namesToRemove.count(fastaFileNames[i][j]) == 0) {
381                                                         if(m->isBlank(fastaFileNames[i][j])){
382                                                                 remove(fastaFileNames[i][j].c_str());
383                                                                 namesToRemove.insert(fastaFileNames[i][j]);
384                                                         
385                                                                 if(qFileName != ""){
386                                                                         remove(qualFileNames[i][j].c_str());
387                                                                         namesToRemove.insert(qualFileNames[i][j]);
388                                                                 }
389                                                                 
390                                                                 if(nameFile != ""){
391                                                                         remove(nameFileNames[i][j].c_str());
392                                                                         namesToRemove.insert(nameFileNames[i][j]);
393                                                                 }
394                                                         }else{  
395                                                                 it = uniqueFastaNames.find(fastaFileNames[i][j]);
396                                                                 if (it == uniqueFastaNames.end()) {     
397                                                                         uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
398                                                                 }       
399                                                         }
400                                                 }
401                                         }
402                                 }
403                         }
404                         
405                         //remove names for outputFileNames, just cleans up the output
406                         vector<string> outputNames2;
407                         for(int i = 0; i < outputNames.size(); i++) { if (namesToRemove.count(outputNames[i]) == 0) { outputNames2.push_back(outputNames[i]); } }
408                         outputNames = outputNames2;
409                         
410                         for (it = uniqueFastaNames.begin(); it != uniqueFastaNames.end(); it++) {
411                                 ifstream in;
412                                 m->openInputFile(it->first, in);
413                                 
414                                 ofstream out;
415                                 string thisGroupName = outputDir + m->getRootName(m->getSimpleName(it->first)) + "groups";
416                                 outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
417                                 m->openOutputFile(thisGroupName, out);
418                                 
419                                 while (!in.eof()){
420                                         if (m->control_pressed) { break; }
421                                         
422                                         Sequence currSeq(in); m->gobble(in);
423                                         out << currSeq.getName() << '\t' << it->second << endl;
424                                 }
425                                 in.close();
426                                 out.close();
427                         }
428                 }
429                 
430                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0;     }
431
432                 //output group counts
433                 m->mothurOutEndLine();
434                 int total = 0;
435                 if (groupCounts.size() != 0) {  m->mothurOut("Group count: \n");  }
436                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
437                          total += it->second; m->mothurOut(it->first + "\t" + toString(it->second)); m->mothurOutEndLine(); 
438                 }
439                 if (total != 0) { m->mothurOut("Total of all groups is " + toString(total)); m->mothurOutEndLine(); }
440                 
441                 if (m->control_pressed) {       for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } return 0;     }
442
443                 //set fasta file as new current fastafile
444                 string current = "";
445                 itTypes = outputTypes.find("fasta");
446                 if (itTypes != outputTypes.end()) {
447                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
448                 }
449                 
450                 itTypes = outputTypes.find("name");
451                 if (itTypes != outputTypes.end()) {
452                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
453                 }
454                 
455                 itTypes = outputTypes.find("qfile");
456                 if (itTypes != outputTypes.end()) {
457                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
458                 }
459                 
460                 itTypes = outputTypes.find("group");
461                 if (itTypes != outputTypes.end()) {
462                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
463                 }
464
465                 m->mothurOutEndLine();
466                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
467                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
468                 m->mothurOutEndLine();
469                 
470                 return 0;       
471                         
472         }
473         catch(exception& e) {
474                 m->errorOut(e, "TrimSeqsCommand", "execute");
475                 exit(1);
476         }
477 }
478                 
479 /**************************************************************************************/
480
481 int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string trimFileName, string scrapFileName, string trimQFileName, string scrapQFileName, string trimNFileName, string scrapNFileName, string groupFileName, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames, linePair* line, linePair* qline) {    
482                 
483         try {
484                 
485                 ofstream trimFASTAFile;
486                 m->openOutputFile(trimFileName, trimFASTAFile);
487                 
488                 ofstream scrapFASTAFile;
489                 m->openOutputFile(scrapFileName, scrapFASTAFile);
490                 
491                 ofstream trimQualFile;
492                 ofstream scrapQualFile;
493                 if(qFileName != ""){
494                         m->openOutputFile(trimQFileName, trimQualFile);
495                         m->openOutputFile(scrapQFileName, scrapQualFile);
496                 }
497                 
498                 ofstream trimNameFile;
499                 ofstream scrapNameFile;
500                 if(nameFile != ""){
501                         m->openOutputFile(trimNFileName, trimNameFile);
502                         m->openOutputFile(scrapNFileName, scrapNameFile);
503                 }
504                 
505                 
506                 ofstream outGroupsFile;
507                 if (oligoFile != ""){   m->openOutputFile(groupFileName, outGroupsFile);   }
508                 if(allFiles){
509                         for (int i = 0; i < fastaFileNames.size(); i++) { //clears old file
510                                 for (int j = 0; j < fastaFileNames[i].size(); j++) { //clears old file
511                                         if (fastaFileNames[i][j] != "") {
512                                                 ofstream temp;
513                                                 m->openOutputFile(fastaFileNames[i][j], temp);                  temp.close();
514                                                 if(qFileName != ""){
515                                                         m->openOutputFile(qualFileNames[i][j], temp);                   temp.close();
516                                                 }
517                                                 
518                                                 if(nameFile != ""){
519                                                         m->openOutputFile(nameFileNames[i][j], temp);                   temp.close();
520                                                 }
521                                         }
522                                 }
523                         }
524                 }
525                 
526                 ifstream inFASTA;
527                 m->openInputFile(filename, inFASTA);
528                 inFASTA.seekg(line->start);
529                 
530                 ifstream qFile;
531                 if(qFileName != "")     {
532                         m->openInputFile(qFileName, qFile);
533                         qFile.seekg(qline->start);  
534                 }
535                 
536                 int count = 0;
537                 bool moreSeqs = 1;
538         
539                 while (moreSeqs) {
540                                 
541                         if (m->control_pressed) { 
542                                 inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
543                                 if (oligoFile != "") {   outGroupsFile.close();   }
544
545                                 if(qFileName != ""){
546                                         qFile.close();
547                                 }
548                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
549
550                                 return 0;
551                         }
552                         
553                         int success = 1;
554                         string trashCode = "";
555                         int currentSeqsDiffs = 0;
556
557                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
558                         
559                         QualityScores currQual;
560                         if(qFileName != ""){
561                                 currQual = QualityScores(qFile);  m->gobble(qFile);
562                         }
563
564                         string origSeq = currSeq.getUnaligned();
565                         if (origSeq != "") {
566                                 
567                                 int barcodeIndex = 0;
568                                 int primerIndex = 0;
569                                 
570                                 if(barcodes.size() != 0){
571                                         success = stripBarcode(currSeq, currQual, barcodeIndex);
572                                         if(success > bdiffs)            {       trashCode += 'b';       }
573                                         else{ currentSeqsDiffs += success;  }
574                                 }
575                                 
576                                 if(numFPrimers != 0){
577                                         success = stripForward(currSeq, currQual, primerIndex);
578                                         if(success > pdiffs)            {       trashCode += 'f';       }
579                                         else{ currentSeqsDiffs += success;  }
580                                 }
581                                 
582                                 if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
583                                 
584                                 if(numRPrimers != 0){
585                                         success = stripReverse(currSeq, currQual);
586                                         if(!success)                            {       trashCode += 'r';       }
587                                 }
588
589                                 if(keepFirst != 0){
590                                         success = keepFirstTrim(currSeq, currQual);
591                                 }
592                                 
593                                 if(removeLast != 0){
594                                         success = removeLastTrim(currSeq, currQual);
595                                         if(!success)                            {       trashCode += 'l';       }
596                                 }
597
598                                 
599                                 if(qFileName != ""){
600                                         int origLength = currSeq.getNumBases();
601                                         
602                                         if(qThreshold != 0)                     {       success = currQual.stripQualThreshold(currSeq, qThreshold);                     }
603                                         else if(qAverage != 0)          {       success = currQual.cullQualAverage(currSeq, qAverage);                          }
604                                         else if(qRollAverage != 0)      {       success = currQual.stripQualRollingAverage(currSeq, qRollAverage);      }
605                                         else if(qWindowAverage != 0){   success = currQual.stripQualWindowAverage(currSeq, qWindowStep, qWindowSize, qWindowAverage);   }
606                                         else                                            {       success = 1;                            }
607                                 
608                                         //you don't want to trim, if it fails above then scrap it
609                                         if ((!qtrim) && (origLength != currSeq.getNumBases())) {  success = 0; }
610                                         
611                                         if(!success)                            {       trashCode += 'q';       }
612                                 }                               
613                 
614                                 if(minLength > 0 || maxLength > 0){
615                                         success = cullLength(currSeq);
616                                         if(!success)                            {       trashCode += 'l';       }
617                                 }
618                                 if(maxHomoP > 0){
619                                         success = cullHomoP(currSeq);
620                                         if(!success)                            {       trashCode += 'h';       }
621                                 }
622                                 if(maxAmbig != -1){
623                                         success = cullAmbigs(currSeq);
624                                         if(!success)                            {       trashCode += 'n';       }
625                                 }
626                                 
627                                 if(flip){               // should go last                       
628                                         currSeq.reverseComplement();
629                                         if(qFileName != ""){
630                                                 currQual.flipQScores(); 
631                                         }
632                                 }
633                                 
634                                 if(trashCode.length() == 0){
635                                         currSeq.setAligned(currSeq.getUnaligned());
636                                         currSeq.printSequence(trimFASTAFile);
637                                         
638                                         if(qFileName != ""){
639                                                 currQual.printQScores(trimQualFile);
640                                         }
641                                         
642                                         if(nameFile != ""){
643                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
644                                                 if (itName != nameMap.end()) {  trimNameFile << itName->first << '\t' << itName->second << endl; }
645                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
646                                         }
647                                         
648                                         if(barcodes.size() != 0){
649                                                 string thisGroup = barcodeNameVector[barcodeIndex];
650                                                 if (primers.size() != 0) { if (primerNameVector[primerIndex] != "") { thisGroup += "." + primerNameVector[primerIndex]; } }
651                                                 
652                                                 outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl;
653                                                 
654                                                 if (nameFile != "") {
655                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
656                                                         if (itName != nameMap.end()) { 
657                                                                 vector<string> thisSeqsNames; 
658                                                                 m->splitAtChar(itName->second, thisSeqsNames, ',');
659                                                                 for (int k = 1; k < thisSeqsNames.size(); k++) { //start at 1 to skip self
660                                                                         outGroupsFile << thisSeqsNames[k] << '\t' << thisGroup << endl;
661                                                                 }
662                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }                                                   
663                                                 }
664                                                 
665                                                 map<string, int>::iterator it = groupCounts.find(thisGroup);
666                                                 if (it == groupCounts.end()) {  groupCounts[thisGroup] = 1; }
667                                                 else { groupCounts[it->first]++; }
668                                                         
669                                         }
670                                         
671                                         
672                                         if(allFiles){
673                                                 ofstream output;
674                                                 m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
675                                                 currSeq.printSequence(output);
676                                                 output.close();
677                                                 
678                                                 if(qFileName != ""){
679                                                         m->openOutputFileAppend(qualFileNames[barcodeIndex][primerIndex], output);
680                                                         currQual.printQScores(output);
681                                                         output.close();                                                 
682                                                 }
683                                                 
684                                                 if(nameFile != ""){
685                                                         map<string, string>::iterator itName = nameMap.find(currSeq.getName());
686                                                         if (itName != nameMap.end()) { 
687                                                                 m->openOutputFileAppend(nameFileNames[barcodeIndex][primerIndex], output);
688                                                                 output << itName->first << '\t' << itName->second << endl; 
689                                                                 output.close();
690                                                         }else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
691                                                 }
692                                         }
693                                 }
694                                 else{
695                                         if(nameFile != ""){ //needs to be before the currSeq name is changed
696                                                 map<string, string>::iterator itName = nameMap.find(currSeq.getName());
697                                                 if (itName != nameMap.end()) {  scrapNameFile << itName->first << '\t' << itName->second << endl; }
698                                                 else { m->mothurOut("[ERROR]: " + currSeq.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); }
699                                         }
700                                         currSeq.setName(currSeq.getName() + '|' + trashCode);
701                                         currSeq.setUnaligned(origSeq);
702                                         currSeq.setAligned(origSeq);
703                                         currSeq.printSequence(scrapFASTAFile);
704                                         if(qFileName != ""){
705                                                 currQual.printQScores(scrapQualFile);
706                                         }
707                                 }
708                                 count++;
709                         }
710                         
711                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
712                                 unsigned long int pos = inFASTA.tellg();
713                                 if ((pos == -1) || (pos >= line->end)) { break; }
714                         #else
715                                 if (inFASTA.eof()) { break; }
716                         #endif
717                                 
718                         //report progress
719                         if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
720                         
721                 }
722                 //report progress
723                 if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
724
725                 
726                 inFASTA.close();
727                 trimFASTAFile.close();
728                 scrapFASTAFile.close();
729                 if (oligoFile != "") {   outGroupsFile.close();   }
730                 if(qFileName != "")     {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
731                 if(nameFile != "")      {       scrapNameFile.close(); trimNameFile.close();    }
732                 
733                 return count;
734         }
735         catch(exception& e) {
736                 m->errorOut(e, "TrimSeqsCommand", "driverCreateTrim");
737                 exit(1);
738         }
739 }
740
741 /**************************************************************************************************/
742
743 int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName, string trimFASTAFileName, string scrapFASTAFileName, string trimQualFileName, string scrapQualFileName, string trimNameFileName, string scrapNameFileName, string groupFile, vector<vector<string> > fastaFileNames, vector<vector<string> > qualFileNames, vector<vector<string> > nameFileNames) {
744         try {
745 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
746                 int process = 1;
747                 int exitCommand = 1;
748                 processIDS.clear();
749                 
750                 //loop through and create all the processes you want
751                 while (process != processors) {
752                         int pid = fork();
753                         
754                         if (pid > 0) {
755                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
756                                 process++;
757                         }else if (pid == 0){
758                                 
759                                 vector<vector<string> > tempFASTAFileNames = fastaFileNames;
760                                 vector<vector<string> > tempPrimerQualFileNames = qualFileNames;
761                                 vector<vector<string> > tempNameFileNames = nameFileNames;
762
763                                 if(allFiles){
764                                         ofstream temp;
765
766                                         for(int i=0;i<tempFASTAFileNames.size();i++){
767                                                 for(int j=0;j<tempFASTAFileNames[i].size();j++){
768                                                         if (tempFASTAFileNames[i][j] != "") {
769                                                                 tempFASTAFileNames[i][j] += toString(getpid()) + ".temp";
770                                                                 m->openOutputFile(tempFASTAFileNames[i][j], temp);                      temp.close();
771
772                                                                 if(qFileName != ""){
773                                                                         tempPrimerQualFileNames[i][j] += toString(getpid()) + ".temp";
774                                                                         m->openOutputFile(tempPrimerQualFileNames[i][j], temp);         temp.close();
775                                                                 }
776                                                                 if(nameFile != ""){
777                                                                         tempNameFileNames[i][j] += toString(getpid()) + ".temp";
778                                                                         m->openOutputFile(tempNameFileNames[i][j], temp);               temp.close();
779                                                                 }
780                                                         }
781                                                 }
782                                         }
783                                 }
784                                                         
785                                 driverCreateTrim(filename,
786                                                                  qFileName,
787                                                                  (trimFASTAFileName + toString(getpid()) + ".temp"),
788                                                                  (scrapFASTAFileName + toString(getpid()) + ".temp"),
789                                                                  (trimQualFileName + toString(getpid()) + ".temp"),
790                                                                  (scrapQualFileName + toString(getpid()) + ".temp"),
791                                                                  (trimNameFileName + toString(getpid()) + ".temp"),
792                                                                  (scrapNameFileName + toString(getpid()) + ".temp"),
793                                                                  (groupFile + toString(getpid()) + ".temp"),
794                                                                  tempFASTAFileNames,
795                                                                  tempPrimerQualFileNames,
796                                                                  tempNameFileNames,
797                                                                  lines[process],
798                                                                  qLines[process]);
799                                 
800                                 //pass groupCounts to parent
801                                 ofstream out;
802                                 string tempFile = filename + toString(getpid()) + ".num.temp";
803                                 m->openOutputFile(tempFile, out);
804                                 for (map<string, int>::iterator it = groupCounts.begin(); it != groupCounts.end(); it++) {
805                                         out << it->first << '\t' << it->second << endl;
806                                 }
807                                 out.close();
808                                 
809                                 exit(0);
810                         }else { 
811                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
812                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
813                                 exit(0);
814                         }
815                 }
816                 
817                 //parent do my part
818                 ofstream temp;
819                 m->openOutputFile(trimFASTAFileName, temp);             temp.close();
820                 m->openOutputFile(scrapFASTAFileName, temp);    temp.close();
821                 m->openOutputFile(trimQualFileName, temp);              temp.close();
822                 m->openOutputFile(scrapQualFileName, temp);             temp.close();
823                 m->openOutputFile(trimNameFileName, temp);              temp.close();
824                 m->openOutputFile(scrapNameFileName, temp);             temp.close();
825
826                 driverCreateTrim(filename, qFileName, trimFASTAFileName, scrapFASTAFileName, trimQualFileName, scrapQualFileName, trimNameFileName, scrapNameFileName, groupFile, fastaFileNames, qualFileNames, nameFileNames, lines[0], qLines[0]);
827                 
828                 //force parent to wait until all the processes are done
829                 for (int i=0;i<processIDS.size();i++) { 
830                         int temp = processIDS[i];
831                         wait(&temp);
832                 }
833                 
834                 //append files
835                 for(int i=0;i<processIDS.size();i++){
836                         
837                         m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
838                         
839                         m->appendFiles((trimFASTAFileName + toString(processIDS[i]) + ".temp"), trimFASTAFileName);
840                         remove((trimFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
841                         m->appendFiles((scrapFASTAFileName + toString(processIDS[i]) + ".temp"), scrapFASTAFileName);
842                         remove((scrapFASTAFileName + toString(processIDS[i]) + ".temp").c_str());
843                         
844                         if(qFileName != ""){
845                                 m->appendFiles((trimQualFileName + toString(processIDS[i]) + ".temp"), trimQualFileName);
846                                 remove((trimQualFileName + toString(processIDS[i]) + ".temp").c_str());
847                                 m->appendFiles((scrapQualFileName + toString(processIDS[i]) + ".temp"), scrapQualFileName);
848                                 remove((scrapQualFileName + toString(processIDS[i]) + ".temp").c_str());
849                         }
850                         
851                         if(nameFile != ""){
852                                 m->appendFiles((trimNameFileName + toString(processIDS[i]) + ".temp"), trimNameFileName);
853                                 remove((trimNameFileName + toString(processIDS[i]) + ".temp").c_str());
854                                 m->appendFiles((scrapNameFileName + toString(processIDS[i]) + ".temp"), scrapNameFileName);
855                                 remove((scrapNameFileName + toString(processIDS[i]) + ".temp").c_str());
856                         }
857                         
858                         m->appendFiles((groupFile + toString(processIDS[i]) + ".temp"), groupFile);
859                         remove((groupFile + toString(processIDS[i]) + ".temp").c_str());
860                         
861                         
862                         if(allFiles){
863                                 for(int j=0;j<fastaFileNames.size();j++){
864                                         for(int k=0;k<fastaFileNames[j].size();k++){
865                                                 if (fastaFileNames[j][k] != "") {
866                                                         m->appendFiles((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp"), fastaFileNames[j][k]);
867                                                         remove((fastaFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
868                                                         
869                                                         if(qFileName != ""){
870                                                                 m->appendFiles((qualFileNames[j][k] + toString(processIDS[i]) + ".temp"), qualFileNames[j][k]);
871                                                                 remove((qualFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
872                                                         }
873                                                         
874                                                         if(nameFile != ""){
875                                                                 m->appendFiles((nameFileNames[j][k] + toString(processIDS[i]) + ".temp"), nameFileNames[j][k]);
876                                                                 remove((nameFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
877                                                         }
878                                                 }
879                                         }
880                                 }
881                         }
882                         
883                         ifstream in;
884                         string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
885                         m->openInputFile(tempFile, in);
886                         int tempNum;
887                         string group;
888                         while (!in.eof()) { 
889                                 in >> group >> tempNum; m->gobble(in);
890                                 
891                                 map<string, int>::iterator it = groupCounts.find(group);
892                                 if (it == groupCounts.end()) {  groupCounts[group] = tempNum; }
893                                 else { groupCounts[it->first] += tempNum; }
894                         }
895                         in.close(); remove(tempFile.c_str());
896                         
897                 }
898         
899                 return exitCommand;
900 #endif          
901         }
902         catch(exception& e) {
903                 m->errorOut(e, "TrimSeqsCommand", "createProcessesCreateTrim");
904                 exit(1);
905         }
906 }
907
908 /**************************************************************************************************/
909
910 int TrimSeqsCommand::setLines(string filename, string qfilename, vector<unsigned long int>& fastaFilePos, vector<unsigned long int>& qfileFilePos) {
911         try {
912                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
913                 //set file positions for fasta file
914                 fastaFilePos = m->divideFile(filename, processors);
915                 
916                 if (qfilename == "") { return processors; }
917                 
918                 //get name of first sequence in each chunk
919                 map<string, int> firstSeqNames;
920                 for (int i = 0; i < (fastaFilePos.size()-1); i++) {
921                         ifstream in;
922                         m->openInputFile(filename, in);
923                         in.seekg(fastaFilePos[i]);
924                 
925                         Sequence temp(in); 
926                         firstSeqNames[temp.getName()] = i;
927                 
928                         in.close();
929                 }
930                                 
931                 //seach for filePos of each first name in the qfile and save in qfileFilePos
932                 ifstream inQual;
933                 m->openInputFile(qfilename, inQual);
934                 
935                 string input;
936                 while(!inQual.eof()){   
937                         input = m->getline(inQual);
938
939                         if (input.length() != 0) {
940                                 if(input[0] == '>'){ //this is a sequence name line
941                                         istringstream nameStream(input);
942                                         
943                                         string sname = "";  nameStream >> sname;
944                                         sname = sname.substr(1);
945                                         
946                                         map<string, int>::iterator it = firstSeqNames.find(sname);
947                                         
948                                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
949                                                 unsigned long int pos = inQual.tellg(); 
950                                                 qfileFilePos.push_back(pos - input.length() - 1);       
951                                                 firstSeqNames.erase(it);
952                                         }
953                                 }
954                         }
955                         
956                         if (firstSeqNames.size() == 0) { break; }
957                 }
958                 inQual.close();
959                 
960                 
961                 if (firstSeqNames.size() != 0) { 
962                         for (map<string, int>::iterator it = firstSeqNames.begin(); it != firstSeqNames.end(); it++) {
963                                 m->mothurOut(it->first + " is in your fasta file and not in your quality file, not using quality file."); m->mothurOutEndLine();
964                         }
965                         qFileName = "";
966                         return processors;
967                 }
968
969                 //get last file position of qfile
970                 FILE * pFile;
971                 unsigned long int size;
972                 
973                 //get num bytes in file
974                 pFile = fopen (qfilename.c_str(),"rb");
975                 if (pFile==NULL) perror ("Error opening file");
976                 else{
977                         fseek (pFile, 0, SEEK_END);
978                         size=ftell (pFile);
979                         fclose (pFile);
980                 }
981                 
982                 qfileFilePos.push_back(size);
983                 
984                 return processors;
985                 
986                 #else
987                 
988                         fastaFilePos.push_back(0); qfileFilePos.push_back(0);
989                         //get last file position of fastafile
990                         FILE * pFile;
991                         unsigned long int size;
992                         
993                         //get num bytes in file
994                         pFile = fopen (filename.c_str(),"rb");
995                         if (pFile==NULL) perror ("Error opening file");
996                         else{
997                                 fseek (pFile, 0, SEEK_END);
998                                 size=ftell (pFile);
999                                 fclose (pFile);
1000                         }
1001                         fastaFilePos.push_back(size);
1002                         
1003                         //get last file position of fastafile
1004                         FILE * qFile;
1005                         
1006                         //get num bytes in file
1007                         qFile = fopen (qfilename.c_str(),"rb");
1008                         if (qFile==NULL) perror ("Error opening file");
1009                         else{
1010                                 fseek (qFile, 0, SEEK_END);
1011                                 size=ftell (qFile);
1012                                 fclose (qFile);
1013                         }
1014                         qfileFilePos.push_back(size);
1015                 
1016                         return 1;
1017                 
1018                 #endif
1019         }
1020         catch(exception& e) {
1021                 m->errorOut(e, "TrimSeqsCommand", "setLines");
1022                 exit(1);
1023         }
1024 }
1025
1026 //***************************************************************************************************************
1027
1028 void TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
1029         try {
1030                 ifstream inOligos;
1031                 m->openInputFile(oligoFile, inOligos);
1032                 
1033                 ofstream test;
1034                 
1035                 string type, oligo, group;
1036
1037                 int indexPrimer = 0;
1038                 int indexBarcode = 0;
1039                 
1040                 while(!inOligos.eof()){
1041
1042                         inOligos >> type; 
1043                                         
1044                         if(type[0] == '#'){
1045                                 while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
1046                                 m->gobble(inOligos);
1047                         }
1048                         else{
1049                                 m->gobble(inOligos);
1050                                 //make type case insensitive
1051                                 for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
1052                                 
1053                                 inOligos >> oligo;
1054                                 
1055                                 for(int i=0;i<oligo.length();i++){
1056                                         oligo[i] = toupper(oligo[i]);
1057                                         if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
1058                                 }
1059                                 
1060                                 if(type == "FORWARD"){
1061                                         group = "";
1062                                         
1063                                         // get rest of line in case there is a primer name
1064                                         while (!inOligos.eof()) {       
1065                                                 char c = inOligos.get(); 
1066                                                 if (c == 10 || c == 13){        break;  }
1067                                                 else if (c == 32 || c == 9){;} //space or tab
1068                                                 else {  group += c;  }
1069                                         } 
1070                                         
1071                                         //check for repeat barcodes
1072                                         map<string, int>::iterator itPrime = primers.find(oligo);
1073                                         if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1074                                         
1075                                         primers[oligo]=indexPrimer; indexPrimer++;              
1076                                         primerNameVector.push_back(group);
1077                                 }
1078                                 else if(type == "REVERSE"){
1079                                         Sequence oligoRC("reverse", oligo);
1080                                         oligoRC.reverseComplement();
1081                                         revPrimer.push_back(oligoRC.getUnaligned());
1082                                 }
1083                                 else if(type == "BARCODE"){
1084                                         inOligos >> group;
1085                                         
1086                                         //check for repeat barcodes
1087                                         map<string, int>::iterator itBar = barcodes.find(oligo);
1088                                         if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
1089                                                 
1090                                         barcodes[oligo]=indexBarcode; indexBarcode++;
1091                                         barcodeNameVector.push_back(group);
1092                                 }
1093                                 else{   m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
1094                         }
1095                         m->gobble(inOligos);
1096                 }       
1097                 inOligos.close();
1098                 
1099                 if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
1100                 
1101                 //add in potential combos
1102                 if(barcodeNameVector.size() == 0){
1103                         barcodes[""] = 0;
1104                         barcodeNameVector.push_back("");                        
1105                 }
1106                 
1107                 if(primerNameVector.size() == 0){
1108                         primers[""] = 0;
1109                         primerNameVector.push_back("");                 
1110                 }
1111                 
1112                 fastaFileNames.resize(barcodeNameVector.size());
1113                 for(int i=0;i<fastaFileNames.size();i++){
1114                         fastaFileNames[i].assign(primerNameVector.size(), "");
1115                 }
1116                 if(qFileName != "")     {       qualFileNames = fastaFileNames; }
1117                 if(nameFile != "")      {       nameFileNames = fastaFileNames; }
1118                 
1119                 if(allFiles){
1120                         set<string> uniqueNames; //used to cleanup outputFileNames
1121                         for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
1122                                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
1123                                         
1124                                         string primerName = primerNameVector[itPrimer->second];
1125                                         string barcodeName = barcodeNameVector[itBar->second];
1126                                         
1127                                         string comboGroupName = "";
1128                                         string fastaFileName = "";
1129                                         string qualFileName = "";
1130                                         string nameFileName = "";
1131                                         
1132                                         if(primerName == ""){
1133                                                 comboGroupName = barcodeNameVector[itBar->second];
1134                                         }
1135                                         else{
1136                                                 if(barcodeName == ""){
1137                                                         comboGroupName = primerNameVector[itPrimer->second];
1138                                                 }
1139                                                 else{
1140                                                         comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
1141                                                 }
1142                                         }
1143                                         
1144                                         
1145                                         ofstream temp;
1146                                         fastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + comboGroupName + ".fasta";
1147                                         if (uniqueNames.count(fastaFileName) == 0) {
1148                                                 outputNames.push_back(fastaFileName);
1149                                                 outputTypes["fasta"].push_back(fastaFileName);
1150                                                 uniqueNames.insert(fastaFileName);
1151                                         }
1152                                         
1153                                         fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
1154                                         m->openOutputFile(fastaFileName, temp);         temp.close();
1155                                         
1156                                         if(qFileName != ""){
1157                                                 qualFileName = outputDir + m->getRootName(m->getSimpleName(qFileName)) + comboGroupName + ".qual";
1158                                                 if (uniqueNames.count(qualFileName) == 0) {
1159                                                         outputNames.push_back(qualFileName);
1160                                                         outputTypes["qfile"].push_back(qualFileName);
1161                                                 }
1162                                                 
1163                                                 qualFileNames[itBar->second][itPrimer->second] = qualFileName;
1164                                                 m->openOutputFile(qualFileName, temp);          temp.close();
1165                                         }
1166                                         
1167                                         if(nameFile != ""){
1168                                                 nameFileName = outputDir + m->getRootName(m->getSimpleName(nameFile)) + comboGroupName + ".names";
1169                                                 if (uniqueNames.count(nameFileName) == 0) {
1170                                                         outputNames.push_back(nameFileName);
1171                                                         outputTypes["name"].push_back(nameFileName);
1172                                                 }
1173                                                 
1174                                                 nameFileNames[itBar->second][itPrimer->second] = nameFileName;
1175                                                 m->openOutputFile(nameFileName, temp);          temp.close();
1176                                         }
1177                                         
1178                                 }
1179                         }
1180                 }
1181                 numFPrimers = primers.size();
1182                 numRPrimers = revPrimer.size();
1183
1184         }
1185         catch(exception& e) {
1186                 m->errorOut(e, "TrimSeqsCommand", "getOligos");
1187                 exit(1);
1188         }
1189 }
1190
1191 //***************************************************************************************************************
1192
1193 int TrimSeqsCommand::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
1194         try {
1195                 
1196                 string rawSequence = seq.getUnaligned();
1197                 int success = bdiffs + 1;       //guilty until proven innocent
1198                 
1199                 //can you find the barcode
1200                 for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1201                         string oligo = it->first;
1202                         if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
1203                                 success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1204                                 break;  
1205                         }
1206                         
1207                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1208                                 group = it->second;
1209                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1210                                 
1211                                 if(qual.getName() != ""){
1212                                         qual.trimQScores(oligo.length(), -1);
1213                                 }
1214                                 
1215                                 success = 0;
1216                                 break;
1217                         }
1218                 }
1219                 
1220                 //if you found the barcode or if you don't want to allow for diffs
1221                 if ((bdiffs == 0) || (success == 0)) { return success;  }
1222                 
1223                 else { //try aligning and see if you can find it
1224
1225                         int maxLength = 0;
1226
1227                         Alignment* alignment;
1228                         if (barcodes.size() > 0) {
1229                                 map<string,int>::iterator it=barcodes.begin();
1230
1231                                 for(it;it!=barcodes.end();it++){
1232                                         if(it->first.length() > maxLength){
1233                                                 maxLength = it->first.length();
1234                                         }
1235                                 }
1236                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
1237
1238                         }else{ alignment = NULL; } 
1239                         
1240                         //can you find the barcode
1241                         int minDiff = 1e6;
1242                         int minCount = 1;
1243                         int minGroup = -1;
1244                         int minPos = 0;
1245                         
1246                         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
1247                                 string oligo = it->first;
1248 //                              int length = oligo.length();
1249                                 
1250                                 if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
1251                                         success = bdiffs + 10;
1252                                         break;
1253                                 }
1254                                 
1255                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1256                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
1257                                 oligo = alignment->getSeqAAln();
1258                                 string temp = alignment->getSeqBAln();
1259                 
1260                                 int alnLength = oligo.length();
1261                                 
1262                                 for(int i=oligo.length()-1;i>=0;i--){
1263                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1264                                 }
1265                                 oligo = oligo.substr(0,alnLength);
1266                                 temp = temp.substr(0,alnLength);
1267                                 
1268                                 int numDiff = countDiffs(oligo, temp);
1269                                 
1270                                 if(numDiff < minDiff){
1271                                         minDiff = numDiff;
1272                                         minCount = 1;
1273                                         minGroup = it->second;
1274                                         minPos = 0;
1275                                         for(int i=0;i<alnLength;i++){
1276                                                 if(temp[i] != '-'){
1277                                                         minPos++;
1278                                                 }
1279                                         }
1280                                 }
1281                                 else if(numDiff == minDiff){
1282                                         minCount++;
1283                                 }
1284
1285                         }
1286
1287                         if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
1288                         else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
1289                         else{                                                                                                   //use the best match
1290                                 group = minGroup;
1291                                 seq.setUnaligned(rawSequence.substr(minPos));
1292                                 
1293                                 if(qual.getName() != ""){
1294                                         qual.trimQScores(minPos, -1);
1295                                 }
1296                                 success = minDiff;
1297                         }
1298                         
1299                         if (alignment != NULL) {  delete alignment;  }
1300                         
1301                 }
1302                 
1303                 return success;
1304                 
1305         }
1306         catch(exception& e) {
1307                 m->errorOut(e, "TrimSeqsCommand", "stripBarcode");
1308                 exit(1);
1309         }
1310
1311 }
1312
1313 //***************************************************************************************************************
1314
1315 int TrimSeqsCommand::stripForward(Sequence& seq, QualityScores& qual, int& group){
1316         try {
1317                 string rawSequence = seq.getUnaligned();
1318                 int success = pdiffs + 1;       //guilty until proven innocent
1319                 
1320                 //can you find the primer
1321                 for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1322                         string oligo = it->first;
1323                         if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
1324                                 success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
1325                                 break;  
1326                         }
1327                         
1328                         if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
1329                                 group = it->second;
1330                                 seq.setUnaligned(rawSequence.substr(oligo.length()));
1331                                 if(qual.getName() != ""){
1332                                         qual.trimQScores(oligo.length(), -1);
1333                                 }
1334                                 success = 0;
1335                                 break;
1336                         }
1337                 }
1338
1339                 //if you found the barcode or if you don't want to allow for diffs
1340                 if ((pdiffs == 0) || (success == 0)) { return success;  }
1341                 
1342                 else { //try aligning and see if you can find it
1343
1344                         int maxLength = 0;
1345
1346                         Alignment* alignment;
1347                         if (primers.size() > 0) {
1348                                 map<string,int>::iterator it=primers.begin();
1349
1350                                 for(it;it!=primers.end();it++){
1351                                         if(it->first.length() > maxLength){
1352                                                 maxLength = it->first.length();
1353                                         }
1354                                 }
1355                                 alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
1356
1357                         }else{ alignment = NULL; } 
1358                         
1359                         //can you find the barcode
1360                         int minDiff = 1e6;
1361                         int minCount = 1;
1362                         int minGroup = -1;
1363                         int minPos = 0;
1364                         
1365                         for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
1366                                 string oligo = it->first;
1367 //                              int length = oligo.length();
1368                                 
1369                                 if(rawSequence.length() < maxLength){   
1370                                         success = pdiffs + 100;
1371                                         break;
1372                                 }
1373                                 
1374                                 //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
1375                                 alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
1376                                 oligo = alignment->getSeqAAln();
1377                                 string temp = alignment->getSeqBAln();
1378                 
1379                                 int alnLength = oligo.length();
1380                                 
1381                                 for(int i=oligo.length()-1;i>=0;i--){
1382                                         if(oligo[i] != '-'){    alnLength = i+1;        break;  }
1383                                 }
1384                                 oligo = oligo.substr(0,alnLength);
1385                                 temp = temp.substr(0,alnLength);
1386                                 
1387                                 int numDiff = countDiffs(oligo, temp);
1388                                 
1389                                 if(numDiff < minDiff){
1390                                         minDiff = numDiff;
1391                                         minCount = 1;
1392                                         minGroup = it->second;
1393                                         minPos = 0;
1394                                         for(int i=0;i<alnLength;i++){
1395                                                 if(temp[i] != '-'){
1396                                                         minPos++;
1397                                                 }
1398                                         }
1399                                 }
1400                                 else if(numDiff == minDiff){
1401                                         minCount++;
1402                                 }
1403
1404                         }
1405
1406                         if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
1407                         else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
1408                         else{                                                                                                   //use the best match
1409                                 group = minGroup;
1410                                 seq.setUnaligned(rawSequence.substr(minPos));
1411                                 if(qual.getName() != ""){
1412                                         qual.trimQScores(minPos, -1);
1413                                 }
1414                                 success = minDiff;
1415                         }
1416                         
1417                         if (alignment != NULL) {  delete alignment;  }
1418                         
1419                 }
1420                 
1421                 return success;
1422
1423         }
1424         catch(exception& e) {
1425                 m->errorOut(e, "TrimSeqsCommand", "stripForward");
1426                 exit(1);
1427         }
1428 }
1429
1430 //***************************************************************************************************************
1431
1432 bool TrimSeqsCommand::stripReverse(Sequence& seq, QualityScores& qual){
1433         try {
1434                 string rawSequence = seq.getUnaligned();
1435                 bool success = 0;       //guilty until proven innocent
1436                 
1437                 for(int i=0;i<numRPrimers;i++){
1438                         string oligo = revPrimer[i];
1439                         
1440                         if(rawSequence.length() < oligo.length()){
1441                                 success = 0;
1442                                 break;
1443                         }
1444                         
1445                         if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
1446                                 seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
1447                                 if(qual.getName() != ""){
1448                                         qual.trimQScores(-1, rawSequence.length()-oligo.length());
1449                                 }
1450                                 success = 1;
1451                                 break;
1452                         }
1453                 }       
1454                 return success;
1455                 
1456         }
1457         catch(exception& e) {
1458                 m->errorOut(e, "TrimSeqsCommand", "stripReverse");
1459                 exit(1);
1460         }
1461 }
1462
1463 //***************************************************************************************************************
1464
1465 bool TrimSeqsCommand::keepFirstTrim(Sequence& sequence, QualityScores& qscores){
1466         try {
1467                 bool success = 1;
1468                 if(qscores.getName() != ""){
1469                         qscores.trimQScores(-1, keepFirst);
1470                 }
1471                 sequence.trim(keepFirst);
1472                 return success;
1473         }
1474         catch(exception& e) {
1475                 m->errorOut(e, "keepFirstTrim", "countDiffs");
1476                 exit(1);
1477         }
1478         
1479 }       
1480
1481 //***************************************************************************************************************
1482
1483 bool TrimSeqsCommand::removeLastTrim(Sequence& sequence, QualityScores& qscores){
1484         try {
1485                 bool success = 0;
1486                 
1487                 int length = sequence.getNumBases() - removeLast;
1488                 
1489                 if(length > 0){
1490                         if(qscores.getName() != ""){
1491                                 qscores.trimQScores(-1, length);
1492                         }
1493                         sequence.trim(length);
1494                         success = 1;
1495                 }
1496                 else{
1497                         success = 0;
1498                 }
1499
1500                 return success;
1501         }
1502         catch(exception& e) {
1503                 m->errorOut(e, "removeLastTrim", "countDiffs");
1504                 exit(1);
1505         }
1506         
1507 }       
1508
1509 //***************************************************************************************************************
1510
1511 bool TrimSeqsCommand::cullLength(Sequence& seq){
1512         try {
1513         
1514                 int length = seq.getNumBases();
1515                 bool success = 0;       //guilty until proven innocent
1516                 
1517                 if(length >= minLength && maxLength == 0)                       {       success = 1;    }
1518                 else if(length >= minLength && length <= maxLength)     {       success = 1;    }
1519                 else                                                                                            {       success = 0;    }
1520                 
1521                 return success;
1522         
1523         }
1524         catch(exception& e) {
1525                 m->errorOut(e, "TrimSeqsCommand", "cullLength");
1526                 exit(1);
1527         }
1528         
1529 }
1530
1531 //***************************************************************************************************************
1532
1533 bool TrimSeqsCommand::cullHomoP(Sequence& seq){
1534         try {
1535                 int longHomoP = seq.getLongHomoPolymer();
1536                 bool success = 0;       //guilty until proven innocent
1537                 
1538                 if(longHomoP <= maxHomoP){      success = 1;    }
1539                 else                                    {       success = 0;    }
1540                 
1541                 return success;
1542         }
1543         catch(exception& e) {
1544                 m->errorOut(e, "TrimSeqsCommand", "cullHomoP");
1545                 exit(1);
1546         }
1547         
1548 }
1549
1550 //***************************************************************************************************************
1551
1552 bool TrimSeqsCommand::cullAmbigs(Sequence& seq){
1553         try {
1554                 int numNs = seq.getAmbigBases();
1555                 bool success = 0;       //guilty until proven innocent
1556                 
1557                 if(numNs <= maxAmbig)   {       success = 1;    }
1558                 else                                    {       success = 0;    }
1559                 
1560                 return success;
1561         }
1562         catch(exception& e) {
1563                 m->errorOut(e, "TrimSeqsCommand", "cullAmbigs");
1564                 exit(1);
1565         }
1566         
1567 }
1568
1569 //***************************************************************************************************************
1570
1571 bool TrimSeqsCommand::compareDNASeq(string oligo, string seq){
1572         try {
1573                 bool success = 1;
1574                 int length = oligo.length();
1575                 
1576                 for(int i=0;i<length;i++){
1577                         
1578                         if(oligo[i] != seq[i]){
1579                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
1580                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
1581                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
1582                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
1583                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
1584                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
1585                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
1586                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
1587                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1588                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
1589                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
1590                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
1591                                 
1592                                 if(success == 0)        {       break;   }
1593                         }
1594                         else{
1595                                 success = 1;
1596                         }
1597                 }
1598                 
1599                 return success;
1600         }
1601         catch(exception& e) {
1602                 m->errorOut(e, "TrimSeqsCommand", "compareDNASeq");
1603                 exit(1);
1604         }
1605
1606 }
1607
1608 //***************************************************************************************************************
1609
1610 int TrimSeqsCommand::countDiffs(string oligo, string seq){
1611         try {
1612
1613                 int length = oligo.length();
1614                 int countDiffs = 0;
1615                 
1616                 for(int i=0;i<length;i++){
1617                                                                 
1618                         if(oligo[i] != seq[i]){
1619                                 if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
1620                                 else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
1621                                 else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
1622                                 else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
1623                                 else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
1624                                 else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
1625                                 else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
1626                                 else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
1627                                 else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1628                                 else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
1629                                 else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
1630                                 else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
1631                         }
1632                         
1633                 }
1634                 
1635                 return countDiffs;
1636         }
1637         catch(exception& e) {
1638                 m->errorOut(e, "TrimSeqsCommand", "countDiffs");
1639                 exit(1);
1640         }
1641
1642 }
1643
1644 //***************************************************************************************************************